82 files changed, 9628 insertions, 7262 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 6aef4f411..3de6d8713 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -11,6 +11,8 @@
 {.push profiler:off.}
 
 include osalloc
+import std/private/syslocks
+import std/sysatomics
 
 template track(op, address, size) =
   when defined(memTracker):
@@ -18,11 +20,42 @@ template track(op, address, size) =
 
 # We manage *chunks* of memory. Each chunk is a multiple of the page size.
 # Each chunk starts at an address that is divisible by the page size.
+# Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
+
+# An allocation of a small pointer looks approximately like this
+#[
+
+  alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
+              |                                                                                                   |
+              v                                                                                                   |
+    Free chunk available                                                                                          |
+              |                                                                                                   |
+              v                                                                                                   v
+      Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
+    (may not add new cells)                                                                                       ^
+              |                                                                                                   |
+              v                                                                                                   |
+     Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
+]#
+# so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
+#
+#
+# A deallocation of a small pointer then looks like this
+#[
+  dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active    -> Chunk is completely unused -----> Chunk references no foreign cells
+                                      |                                       |                   (Add cell into the current chunk)                 |                  Return the current chunk back to tlsf
+                                      |                                       |                                   |                                 |
+                                      v                                       v                                   v                                 v
+                      A different thread owns this chunk.     The current chunk is not active.          chunk.free was < size      Chunk references foreign cells, noop
+                      Add the cell to a.sharedFreeLists      Add the cell into the active chunk          Activate the chunk                       (end)
+                                    (end)                                    (end)                              (end)
+]#
+# So "true" deallocation is delayed for as long as possible in favor of reusing cells.
 
 const
-  InitialMemoryRequest = 128 * PageSize # 0.5 MB
+  nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
   SmallChunkSize = PageSize
-  MaxFli = 30
+  MaxFli = when sizeof(int) > 2: 30 else: 14
   MaxLog2Sli = 5 # 32, this cannot be increased without changing 'uint32'
                  # everywhere!
   MaxSli = 1 shl MaxLog2Sli
@@ -30,7 +63,7 @@ const
   RealFli = MaxFli - FliOffset
 
   # size of chunks in last matrix bin
-  MaxBigChunkSize = 1 shl MaxFli - 1 shl (MaxFli-MaxLog2Sli-1)
+  MaxBigChunkSize = int(1'i32 shl MaxFli - 1'i32 shl (MaxFli-MaxLog2Sli-1))
   HugeChunkSize = MaxBigChunkSize + 1
 
 type
@@ -38,44 +71,12 @@ type
   Trunk = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
-    bits: array[0..IntsPerTrunk-1, int] # a bit vector
+    bits: array[0..IntsPerTrunk-1, uint] # a bit vector
 
   TrunkBuckets = array[0..255, PTrunk]
   IntSet = object
     data: TrunkBuckets
 
-type
-  AlignType = BiggestFloat
-  FreeCell {.final, pure.} = object
-    next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
-    zeroField: int       # 0 means cell is not used (overlaid with typ field)
-                         # 1 means cell is manually managed pointer
-                         # otherwise a PNimType is stored in there
-
-  PChunk = ptr BaseChunk
-  PBigChunk = ptr BigChunk
-  PSmallChunk = ptr SmallChunk
-  BaseChunk {.pure, inheritable.} = object
-    prevSize: int        # size of previous chunk; for coalescing
-                         # 0th bit == 1 if 'used
-    size: int            # if < PageSize it is a small chunk
-
-  SmallChunk = object of BaseChunk
-    next, prev: PSmallChunk  # chunks of the same size
-    freeList: ptr FreeCell
-    free: int            # how many bytes remain
-    acc: int             # accumulator for small object allocation
-    when defined(cpu32):
-      align: int
-    data: AlignType      # start of usable memory
-
-  BigChunk = object of BaseChunk # not necessarily > PageSize!
-    next, prev: PBigChunk    # chunks of the same (or bigger) size
-    data: AlignType      # start of usable memory
-
-template smallChunkOverhead(): untyped = sizeof(SmallChunk)-sizeof(AlignType)
-template bigChunkOverhead(): untyped = sizeof(BigChunk)-sizeof(AlignType)
-
 # ------------- chunk table ---------------------------------------------------
 # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
 # endings of big chunks. This is needed by the merging operation. The only
@@ -96,26 +97,103 @@ type
     key, upperBound: int
     level: int
 
+const
+  RegionHasLock = false # hasThreadSupport and defined(gcDestructors)
+
+type
+  FreeCell {.final, pure.} = object
+    # A free cell is a pointer that has been freed, meaning it became available for reuse.
+    # It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
+    next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
+    when not defined(gcDestructors):
+      zeroField: int       # 0 means cell is not used (overlaid with typ field)
+                          # 1 means cell is manually managed pointer
+                          # otherwise a PNimType is stored in there
+    else:
+      alignment: int
+
+  PChunk = ptr BaseChunk
+  PBigChunk = ptr BigChunk
+  PSmallChunk = ptr SmallChunk
+  BaseChunk {.pure, inheritable.} = object
+    prevSize: int        # size of previous chunk; for coalescing
+                         # 0th bit == 1 if 'used
+    size: int            # if < PageSize it is a small chunk
+    owner: ptr MemRegion
+
+  SmallChunk = object of BaseChunk
+    next, prev: PSmallChunk  # chunks of the same size
+    freeList: ptr FreeCell   # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
+                             #  Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
+    free: int32              # Bytes this chunk is able to provide using both the accumulator and free cells.
+                             # When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
+                             #  reaches dealloc while the source chunk is active.
+                             # Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
+    acc: uint32              # Offset from data, used when there are no free cells available but the chunk is considered free.
+    foreignCells: int        # When a free cell is given to a chunk that is not its origin,
+                             #  both the cell and the source chunk are considered foreign.
+                             # Receiving a foreign cell can happen both when deallocating from another thread or when
+                             #  the active chunk in `a.freeSmallChunks` is not the current chunk.
+                             # Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
+
+  BigChunk = object of BaseChunk # not necessarily > PageSize!
+    next, prev: PBigChunk    # chunks of the same (or bigger) size
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
+
   HeapLinks = object
     len: int
     chunks: array[30, (PBigChunk, int)]
     next: ptr HeapLinks
 
   MemRegion = object
-    minLargeObj, maxLargeObj: int
-    freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
+    when not defined(gcDestructors):
+      minLargeObj, maxLargeObj: int
+    freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
+      # List of available chunks per size class. Only one is expected to be active per class.
+    when defined(gcDestructors):
+      sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
+        # When a thread frees a pointer it did not create, it must not adjust the counters.
+        # Instead, the cell is placed here and deferred until the next allocation.
     flBitmap: uint32
     slBitmap: array[RealFli, uint32]
     matrix: array[RealFli, array[MaxSli, PBigChunk]]
     llmem: PLLChunk
     currMem, maxMem, freeMem, occ: int # memory sizes (allocated from OS)
     lastSize: int # needed for the case that OS gives us pages linearly
+    when RegionHasLock:
+      lock: SysLock
+    when defined(gcDestructors):
+      sharedFreeListBigChunks: PBigChunk # make no attempt at avoiding false sharing for now for this object field
+
     chunkStarts: IntSet
-    root, deleted, last, freeAvlNodes: PAvlNode
-    locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
+    when not defined(gcDestructors):
+      root, deleted, last, freeAvlNodes: PAvlNode
+    lockActive, locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
     nextChunkSize: int
-    bottomData: AvlNode
+    when not defined(gcDestructors):
+      bottomData: AvlNode
     heapLinks: HeapLinks
+    when defined(nimTypeNames):
+      allocCounter, deallocCounter: int
+
+template smallChunkOverhead(): untyped = sizeof(SmallChunk)
+template bigChunkOverhead(): untyped = sizeof(BigChunk)
+
+when hasThreadSupport:
+  template loada(x: untyped): untyped = atomicLoadN(unsafeAddr x, ATOMIC_RELAXED)
+  template storea(x, y: untyped) = atomicStoreN(unsafeAddr x, y, ATOMIC_RELAXED)
+
+  when false:
+    # not yet required
+    template atomicStatDec(x, diff: untyped) = discard atomicSubFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+    template atomicStatInc(x, diff: untyped) = discard atomicAddFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+else:
+  template loada(x: untyped): untyped = x
+  template storea(x, y: untyped) = x = y
+
+template atomicStatDec(x, diff: untyped) = dec x, diff
+template atomicStatInc(x, diff: untyped) = inc x, diff
 
 const
   fsLookupTable: array[byte, int8] = [
@@ -135,10 +213,10 @@ const
   ]
 
 proc msbit(x: uint32): int {.inline.} =
-  let a = if x <= 0xff_ff:
+  let a = if x <= 0xff_ff'u32:
             (if x <= 0xff: 0 else: 8)
           else:
-            (if x <= 0xff_ff_ff: 16 else: 24)
+            (if x <= 0xff_ff_ff'u32: 16 else: 24)
   result = int(fsLookupTable[byte(x shr a)]) + a
 
 proc lsbit(x: uint32): int {.inline.} =
@@ -157,7 +235,7 @@ proc mappingSearch(r, fl, sl: var int) {.inline.} =
   let t = roundup((1 shl (msbit(uint32 r) - MaxLog2Sli)), PageSize) - 1
   r = r + t
   r = r and not t
-  r = min(r, MaxBigChunkSize)
+  r = min(r, MaxBigChunkSize).int
   fl = msbit(uint32 r)
   sl = (r shr (fl - MaxLog2Sli)) - MaxSli
   dec fl, FliOffset
@@ -222,16 +300,12 @@ proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) =
   setBit(sl, a.slBitmap[fl])
   setBit(fl, a.flBitmap)
 
-{.push stack_trace: off.}
-proc initAllocator() = discard "nothing to do anymore"
-{.pop.}
-
 proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
-  inc(a.currMem, bytes)
+  atomicStatInc(a.currMem, bytes)
 
 proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
   a.maxMem = max(a.maxMem, a.currMem)
-  dec(a.currMem, bytes)
+  atomicStatDec(a.currMem, bytes)
 
 proc getMaxMem(a: var MemRegion): int =
   # Since we update maxPagesCount only when freeing pages,
@@ -239,6 +313,20 @@ proc getMaxMem(a: var MemRegion): int =
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
 
+const nimMaxHeap {.intdefine.} = 0
+
+proc allocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osAllocPages(size)
+
+proc tryAllocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osTryAllocPages(size)
+
 proc llAlloc(a: var MemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
   # is done at the end of the allocator's life time.
@@ -248,49 +336,50 @@ proc llAlloc(a: var MemRegion, size: int): pointer =
     # is one page:
     sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
     var old = a.llmem # can be nil and is correct with nil
-    a.llmem = cast[PLLChunk](osAllocPages(PageSize))
-    when defined(avlcorruption):
+    a.llmem = cast[PLLChunk](allocPages(a, PageSize))
+    when defined(nimAvlcorruption):
       trackLocation(a.llmem, PageSize)
     incCurrMem(a, PageSize)
     a.llmem.size = PageSize - sizeof(LLChunk)
     a.llmem.acc = sizeof(LLChunk)
     a.llmem.next = old
-  result = cast[pointer](cast[ByteAddress](a.llmem) + a.llmem.acc)
+  result = cast[pointer](cast[int](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
 
-proc getBottom(a: var MemRegion): PAvlNode =
-  result = addr(a.bottomData)
-  if result.link[0] == nil:
-    result.link[0] = result
-    result.link[1] = result
-
-proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
-  if a.freeAvlNodes != nil:
-    result = a.freeAvlNodes
-    a.freeAvlNodes = a.freeAvlNodes.link[0]
-  else:
-    result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
-    when defined(avlcorruption):
-      cprintf("tracking location: %p\n", result)
-  result.key = key
-  result.upperBound = upperBound
-  let bottom = getBottom(a)
-  result.link[0] = bottom
-  result.link[1] = bottom
-  result.level = 1
-  #when defined(avlcorruption):
-  #  track("allocAvlNode", result, sizeof(AvlNode))
-  sysAssert(bottom == addr(a.bottomData), "bottom data")
-  sysAssert(bottom.link[0] == bottom, "bottom link[0]")
-  sysAssert(bottom.link[1] == bottom, "bottom link[1]")
-
-proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
-  n.link[0] = a.freeAvlNodes
-  a.freeAvlNodes = n
-
-proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
+when not defined(gcDestructors):
+  proc getBottom(a: var MemRegion): PAvlNode =
+    result = addr(a.bottomData)
+    if result.link[0] == nil:
+      result.link[0] = result
+      result.link[1] = result
+
+  proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
+    if a.freeAvlNodes != nil:
+      result = a.freeAvlNodes
+      a.freeAvlNodes = a.freeAvlNodes.link[0]
+    else:
+      result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
+      when defined(nimAvlcorruption):
+        cprintf("tracking location: %p\n", result)
+    result.key = key
+    result.upperBound = upperBound
+    let bottom = getBottom(a)
+    result.link[0] = bottom
+    result.link[1] = bottom
+    result.level = 1
+    #when defined(nimAvlcorruption):
+    #  track("allocAvlNode", result, sizeof(AvlNode))
+    sysAssert(bottom == addr(a.bottomData), "bottom data")
+    sysAssert(bottom.link[0] == bottom, "bottom link[0]")
+    sysAssert(bottom.link[1] == bottom, "bottom link[1]")
+
+  proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
+    n.link[0] = a.freeAvlNodes
+    a.freeAvlNodes = n
+
+proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int): ptr HeapLinks =
   var it = addr(a.heapLinks)
   while it != nil and it.len >= it.chunks.len: it = it.next
   if it == nil:
@@ -299,12 +388,15 @@ proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
     a.heapLinks.next = n
     n.chunks[0] = (p, size)
     n.len = 1
+    result = n
   else:
     let L = it.len
     it.chunks[L] = (p, size)
     inc it.len
+    result = it
 
-include "system/avltree"
+when not defined(gcDestructors):
+  include "system/avltree"
 
 proc llDeallocAll(a: var MemRegion) =
   var it = a.llmem
@@ -334,21 +426,21 @@ proc contains(s: IntSet, key: int): bool =
   var t = intSetGet(s, key shr TrunkShift)
   if t != nil:
     var u = key and TrunkMask
-    result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
+    result = (t.bits[u shr IntShift] and (uint(1) shl (u and IntMask))) != 0
   else:
     result = false
 
 proc incl(a: var MemRegion, s: var IntSet, key: int) =
   var t = intSetPut(a, s, key shr TrunkShift)
   var u = key and TrunkMask
-  t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
+  t.bits[u shr IntShift] = t.bits[u shr IntShift] or (uint(1) shl (u and IntMask))
 
 proc excl(s: var IntSet, key: int) =
   var t = intSetGet(s, key shr TrunkShift)
   if t != nil:
     var u = key and TrunkMask
     t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
-        (1 shl (u and IntMask))
+        (uint(1) shl (u and IntMask))
 
 iterator elements(t: IntSet): int {.inline.} =
   # while traversing it is forbidden to change the set!
@@ -369,7 +461,7 @@ iterator elements(t: IntSet): int {.inline.} =
       r = r.next
 
 proc isSmallChunk(c: PChunk): bool {.inline.} =
-  return c.size <= SmallChunkSize-smallChunkOverhead()
+  result = c.size <= SmallChunkSize-smallChunkOverhead()
 
 proc chunkUnused(c: PChunk): bool {.inline.} =
   result = (c.prevSize and 1) == 0
@@ -385,8 +477,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
           var c = cast[PSmallChunk](c)
 
           let size = c.size
-          var a = cast[ByteAddress](addr(c.data))
-          let limit = a + c.acc
+          var a = cast[int](addr(c.data))
+          let limit = a + c.acc.int
           while a <% limit:
             yield cast[pointer](a)
             a = a +% size
@@ -398,18 +490,19 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
 proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
                       magic: "Plugin", compileTime.}
 
-proc isCell(p: pointer): bool {.inline.} =
-  result = cast[ptr FreeCell](p).zeroField >% 1
+when not defined(gcDestructors):
+  proc isCell(p: pointer): bool {.inline.} =
+    result = cast[ptr FreeCell](p).zeroField >% 1
 
 # ------------- chunk management ----------------------------------------------
 proc pageIndex(c: PChunk): int {.inline.} =
-  result = cast[ByteAddress](c) shr PageShift
+  result = cast[int](c) shr PageShift
 
 proc pageIndex(p: pointer): int {.inline.} =
-  result = cast[ByteAddress](p) shr PageShift
+  result = cast[int](p) shr PageShift
 
 proc pageAddr(p: pointer): PChunk {.inline.} =
-  result = cast[PChunk](cast[ByteAddress](p) and not PageMask)
+  result = cast[PChunk](cast[int](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
 when false:
@@ -421,48 +514,44 @@ when false:
                 it, it.next, it.prev, it.size)
       it = it.next
 
-const nimMaxHeap {.intdefine.} = 0
-
 proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
   when not defined(emscripten):
     if not a.blockChunkSizeIncrease:
       let usedMem = a.occ #a.currMem # - a.freeMem
-      when nimMaxHeap != 0:
-        if usedMem > nimMaxHeap * 1024 * 1024:
-          raiseOutOfMem()
       if usedMem < 64 * 1024:
         a.nextChunkSize = PageSize*4
       else:
         a.nextChunkSize = min(roundup(usedMem shr 2, PageSize), a.nextChunkSize * 2)
-  var size = size
+        a.nextChunkSize = min(a.nextChunkSize, MaxBigChunkSize).int
 
+  var size = size
   if size > a.nextChunkSize:
-    result = cast[PBigChunk](osAllocPages(size))
+    result = cast[PBigChunk](allocPages(a, size))
   else:
-    result = cast[PBigChunk](osTryAllocPages(a.nextChunkSize))
+    result = cast[PBigChunk](tryAllocPages(a, a.nextChunkSize))
     if result == nil:
-      result = cast[PBigChunk](osAllocPages(size))
+      result = cast[PBigChunk](allocPages(a, size))
       a.blockChunkSizeIncrease = true
     else:
       size = a.nextChunkSize
 
   incCurrMem(a, size)
   inc(a.freeMem, size)
-  a.addHeapLink(result, size)
+  let heapLink = a.addHeapLink(result, size)
   when defined(debugHeapLinks):
     cprintf("owner: %p; result: %p; next pointer %p; size: %ld\n", addr(a),
-      result, result.heapLink, result.origSize)
+      result, heapLink, size)
 
   when defined(memtracker):
-    trackLocation(addr result.origSize, sizeof(int))
+    trackLocation(addr result.size, sizeof(int))
 
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "requestOsChunks 1")
+  sysAssert((cast[int](result) and PageMask) == 0, "requestOsChunks 1")
   #zeroMem(result, size)
   result.next = nil
   result.prev = nil
   result.size = size
   # update next.prevSize:
-  var nxt = cast[ByteAddress](result) +% size
+  var nxt = cast[int](result) +% size
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 2")
   var next = cast[PChunk](nxt)
   if pageIndex(next) in a.chunkStarts:
@@ -470,7 +559,7 @@ proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
     next.prevSize = size or (next.prevSize and 1)
   # set result.prevSize:
   var lastSize = if a.lastSize != 0: a.lastSize else: PageSize
-  var prv = cast[ByteAddress](result) -% lastSize
+  var prv = cast[int](result) -% lastSize
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 3")
   var prev = cast[PChunk](prv)
   if pageIndex(prev) in a.chunkStarts and prev.size == lastSize:
@@ -516,21 +605,22 @@ proc listRemove[T](head: var T, c: T) {.inline.} =
 
 proc updatePrevSize(a: var MemRegion, c: PBigChunk,
                     prevSize: int) {.inline.} =
-  var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-  sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "updatePrevSize")
+  var ri = cast[PChunk](cast[int](c) +% c.size)
+  sysAssert((cast[int](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
     ri.prevSize = prevSize or (ri.prevSize and 1)
 
 proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
-  result = cast[PBigChunk](cast[ByteAddress](c) +% size)
+  result = cast[PBigChunk](cast[int](c) +% size)
   result.size = c.size - size
-  track("result.origSize", addr result.origSize, sizeof(int))
-  # XXX check if these two nil assignments are dead code given
-  # addChunkToMatrix's implementation:
-  result.next = nil
-  result.prev = nil
+  track("result.size", addr result.size, sizeof(int))
+  when not defined(nimOptimizedSplitChunk):
+    # still active because of weird codegen issue on some of our CIs:
+    result.next = nil
+    result.prev = nil
   # size and not used:
   result.prevSize = size
+  result.owner = addr a
   sysAssert((size and 1) == 0, "splitChunk 2")
   sysAssert((size and PageMask) == 0,
       "splitChunk: size is not a multiple of the PageSize")
@@ -550,8 +640,8 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
   when coalescLeft:
     let prevSize = c.prevSize
     if prevSize != 0:
-      var le = cast[PChunk](cast[ByteAddress](c) -% prevSize)
-      sysAssert((cast[ByteAddress](le) and PageMask) == 0, "freeBigChunk 4")
+      var le = cast[PChunk](cast[int](c) -% prevSize)
+      sysAssert((cast[int](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
         sysAssert(not isSmallChunk(le), "freeBigChunk 5")
         if not isSmallChunk(le) and le.size < MaxBigChunkSize:
@@ -561,11 +651,14 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
           c = cast[PBigChunk](le)
           if c.size > MaxBigChunkSize:
             let rest = splitChunk2(a, c, MaxBigChunkSize)
+            when defined(nimOptimizedSplitChunk):
+              rest.next = nil
+              rest.prev = nil
             addChunkToMatrix(a, c)
             c = rest
   when coalescRight:
-    var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-    sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "freeBigChunk 2")
+    var ri = cast[PChunk](cast[int](c) +% c.size)
+    sysAssert((cast[int](ri) and PageMask) == 0, "freeBigChunk 2")
     if isAccessible(a, ri) and chunkUnused(ri):
       sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
       if not isSmallChunk(ri) and c.size < MaxBigChunkSize:
@@ -580,42 +673,62 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
 proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
   sysAssert(size > 0, "getBigChunk 2")
   var size = size # roundup(size, PageSize)
-  var fl, sl: int
+  var fl = 0
+  var sl = 0
   mappingSearch(size, fl, sl)
   sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
   result = findSuitableBlock(a, fl, sl)
+
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
+
   if result == nil:
-    if size < InitialMemoryRequest:
-      result = requestOsChunks(a, InitialMemoryRequest)
+    if size < nimMinHeapPages * PageSize:
+      result = requestOsChunks(a, nimMinHeapPages * PageSize)
       splitChunk(a, result, size)
     else:
       result = requestOsChunks(a, size)
       # if we over allocated split the chunk:
       if result.size > size:
         splitChunk(a, result, size)
+    result.owner = addr a
   else:
     removeChunkFromMatrix2(a, result, fl, sl)
     if result.size >= size + PageSize:
       splitChunk(a, result, size)
   # set 'used' to to true:
   result.prevSize = 1
-  track("setUsedToFalse", addr result.origSize, sizeof(int))
+  track("setUsedToFalse", addr result.size, sizeof(int))
+  sysAssert result.owner == addr a, "getBigChunk: No owner set!"
 
   incl(a, a.chunkStarts, pageIndex(result))
   dec(a.freeMem, size)
+  when RegionHasLock:
+    releaseSys a.lock
 
 proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
-  result = cast[PBigChunk](osAllocPages(size))
+  result = cast[PBigChunk](allocPages(a, size))
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
   incCurrMem(a, size)
   # XXX add this to the heap links. But also remove it from it later.
   when false: a.addHeapLink(result, size)
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "getHugeChunk")
+  sysAssert((cast[int](result) and PageMask) == 0, "getHugeChunk")
   result.next = nil
   result.prev = nil
   result.size = size
   # set 'used' to to true:
   result.prevSize = 1
+  result.owner = addr a
   incl(a, a.chunkStarts, pageIndex(result))
+  when RegionHasLock:
+    releaseSys a.lock
 
 proc freeHugeChunk(a: var MemRegion; c: PBigChunk) =
   let size = c.size
@@ -631,7 +744,8 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk =
   result = cast[PSmallChunk](res)
 
 # -----------------------------------------------------------------------------
-proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
+when not defined(gcDestructors):
+  proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
 
 when true:
   template allocInv(a: MemRegion): bool = true
@@ -678,116 +792,180 @@ else:
   template trackSize(x) = discard
   template untrackSize(x) = discard
 
-when false:
-  # not yet used by the GCs
-  proc rawTryAlloc(a: var MemRegion; requestedSize: int): pointer =
-    sysAssert(allocInv(a), "rawAlloc: begin")
-    sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
-    sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
-    var size = roundup(requestedSize, MemAlign)
-    inc a.occ, size
-    trackSize(size)
-    sysAssert(size >= requestedSize, "insufficient allocated size!")
-    #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
-    if size <= SmallChunkSize-smallChunkOverhead():
-      # allocate a small block: for small chunks, we use only its next pointer
-      var s = size div MemAlign
-      var c = a.freeSmallChunks[s]
-      if c == nil:
-        result = nil
-      else:
-        sysAssert c.size == size, "rawAlloc 6"
-        if c.freeList == nil:
-          sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
-                    "rawAlloc 7")
-          result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
-          inc(c.acc, size)
-        else:
-          result = c.freeList
-          sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
-          c.freeList = c.freeList.next
-        dec(c.free, size)
-        sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
-        if c.free < size:
-          listRemove(a.freeSmallChunks[s], c)
-          sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-        sysAssert(((cast[ByteAddress](result) and PageMask) - smallChunkOverhead()) %%
-                  size == 0, "rawAlloc 21")
-        sysAssert(allocInv(a), "rawAlloc: end small size")
+proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
+  when RegionHasLock:
+    acquireSys a.lock
+  dec a.occ, c.size
+  untrackSize(c.size)
+  sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
+  when not defined(gcDestructors):
+    a.deleted = getBottom(a)
+    del(a, a.root, cast[int](addr(c.data)))
+  if c.size >= HugeChunkSize: freeHugeChunk(a, c)
+  else: freeBigChunk(a, c)
+  when RegionHasLock:
+    releaseSys a.lock
+
+when defined(gcDestructors):
+  template atomicPrepend(head, elem: untyped) =
+    # see also https://en.cppreference.com/w/cpp/atomic/atomic_compare_exchange
+    when hasThreadSupport:
+      while true:
+        elem.next.storea head.loada
+        if atomicCompareExchangeN(addr head, addr elem.next, elem, weak = true, ATOMIC_RELEASE, ATOMIC_RELAXED):
+          break
     else:
-      inc size, bigChunkOverhead()
-      var fl, sl: int
-      mappingSearch(size, fl, sl)
-      sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
-      let c = findSuitableBlock(a, fl, sl)
-      if c != nil:
-        removeChunkFromMatrix2(a, c, fl, sl)
-        if c.size >= size + PageSize:
-          splitChunk(a, c, size)
-        # set 'used' to to true:
-        c.prevSize = 1
-        incl(a, a.chunkStarts, pageIndex(c))
-        dec(a.freeMem, size)
-        result = addr(c.data)
-        sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13")
-        sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
-        if a.root == nil: a.root = getBottom(a)
-        add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size)
-      else:
-        result = nil
+      elem.next.storea head.loada
+      head.storea elem
+
+  proc addToSharedFreeListBigChunks(a: var MemRegion; c: PBigChunk) {.inline.} =
+    sysAssert c.next == nil, "c.next pointer must be nil"
+    atomicPrepend a.sharedFreeListBigChunks, c
+
+  proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell; size: int) {.inline.} =
+    atomicPrepend c.owner.sharedFreeLists[size], f
+
+  const MaxSteps = 20
+
+  proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
+    # rawDealloc did NOT do the usual:
+    # `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
+    # memory locations. We have to compensate here for these for the entire list.
+    var it = c.freeList
+    var total = 0
+    while it != nil:
+      inc total, size
+      let chunk = cast[PSmallChunk](pageAddr(it))
+      if c != chunk:
+        # The cell is foreign, potentially even from a foreign thread.
+        # It must block the current chunk from being freed, as doing so would leak memory.
+        inc c.foreignCells
+      it = it.next
+    # By not adjusting the foreign chunk we reserve space in it to prevent deallocation
+    inc(c.free, total)
+    dec(a.occ, total)
+
+  proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
+    var it = root
+    var maxIters = MaxSteps # make it time-bounded
+    while true:
+      let rest = it.next.loada
+      it.next.storea nil
+      deallocBigChunk(a, cast[PBigChunk](it))
+      if maxIters == 0:
+        if rest != nil:
+          addToSharedFreeListBigChunks(a, rest)
+          sysAssert a.sharedFreeListBigChunks != nil, "re-enqueing failed"
+        break
+      it = rest
+      dec maxIters
+      if it == nil: break
 
 proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
+  when defined(nimTypeNames):
+    inc(a.allocCounter)
   sysAssert(allocInv(a), "rawAlloc: begin")
   sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
-  sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
   var size = roundup(requestedSize, MemAlign)
+  sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
   sysAssert(size >= requestedSize, "insufficient allocated size!")
   #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
+
   if size <= SmallChunkSize-smallChunkOverhead():
+    template fetchSharedCells(tc: PSmallChunk) =
+      # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
+      when defined(gcDestructors):
+        if tc.freeList == nil:
+          when hasThreadSupport:
+            # Steal the entire list from `sharedFreeList`:
+            tc.freeList = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED)
+          else:
+            tc.freeList = a.sharedFreeLists[s]
+            a.sharedFreeLists[s] = nil
+          # if `tc.freeList` isn't nil, `tc` will gain capacity.
+          # We must calculate how much it gained and how many foreign cells are included.
+          compensateCounters(a, tc, size)
+
     # allocate a small block: for small chunks, we use only its next pointer
-    var s = size div MemAlign
+    let s = size div MemAlign
     var c = a.freeSmallChunks[s]
     if c == nil:
+      # There is no free chunk of the requested size available, we need a new one.
       c = getSmallChunk(a)
+      # init all fields in case memory didn't get zeroed
       c.freeList = nil
+      c.foreignCells = 0
       sysAssert c.size == PageSize, "rawAlloc 3"
       c.size = size
-      c.acc = size
-      c.free = SmallChunkSize - smallChunkOverhead() - size
+      c.acc = size.uint32
+      c.free = SmallChunkSize - smallChunkOverhead() - size.int32
+      sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
       c.next = nil
       c.prev = nil
-      listAdd(a.freeSmallChunks[s], c)
+      # Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
+      # For those single cell chunks, we would otherwise have to allocate a new one almost every time.
+      fetchSharedCells(c)
+      if c.free >= size:
+        # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
+        #  we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
+        listAdd(a.freeSmallChunks[s], c)
       result = addr(c.data)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 4")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
     else:
+      # There is a free chunk of the requested size available, use it.
       sysAssert(allocInv(a), "rawAlloc: begin c != nil")
       sysAssert c.next != c, "rawAlloc 5"
       #if c.size != size:
       #  c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
+        sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
-        result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
+        result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
         inc(c.acc, size)
       else:
+        # There are free cells available, prefer them over the accumulator
         result = c.freeList
-        sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
+        when not defined(gcDestructors):
+          sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
         c.freeList = c.freeList.next
+        if cast[PSmallChunk](pageAddr(result)) != c:
+          # This cell isn't a blocker for the current chunk's deallocation anymore
+          dec(c.foreignCells)
+        else:
+          sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
+      # Even if the cell we return is foreign, the local chunk's capacity decreases.
+      # The capacity was previously reserved in the source chunk (when it first got allocated),
+      #  then added into the current chunk during dealloc,
+      #  so the source chunk will not be freed or leak memory because of this.
       dec(c.free, size)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
       sysAssert(allocInv(a), "rawAlloc: end c != nil")
-    sysAssert(allocInv(a), "rawAlloc: before c.free < size")
-    if c.free < size:
-      sysAssert(allocInv(a), "rawAlloc: before listRemove test")
-      listRemove(a.freeSmallChunks[s], c)
-      sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-    sysAssert(((cast[ByteAddress](result) and PageMask) - smallChunkOverhead()) %%
+      # We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
+      # If after the adjustment it turns out there's free cells available,
+      #  the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
+      fetchSharedCells(c)
+      sysAssert(allocInv(a), "rawAlloc: before c.free < size")
+      if c.free < size:
+        # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
+        sysAssert(allocInv(a), "rawAlloc: before listRemove test")
+        listRemove(a.freeSmallChunks[s], c)
+        sysAssert(allocInv(a), "rawAlloc: end listRemove test")
+    sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
                size == 0, "rawAlloc 21")
     sysAssert(allocInv(a), "rawAlloc: end small size")
     inc a.occ, size
     trackSize(c.size)
   else:
+    when defined(gcDestructors):
+      when hasThreadSupport:
+        let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED)
+      else:
+        let deferredFrees = a.sharedFreeListBigChunks
+        a.sharedFreeListBigChunks = nil
+      if deferredFrees != nil:
+        freeDeferredObjects(a, deferredFrees)
+
     size = requestedSize + bigChunkOverhead() #  roundup(requestedSize+bigChunkOverhead(), PageSize)
     # allocate a large block
     var c = if size >= HugeChunkSize: getHugeChunk(a, size)
@@ -795,163 +973,210 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
     sysAssert c.prev == nil, "rawAlloc 10"
     sysAssert c.next == nil, "rawAlloc 11"
     result = addr(c.data)
-    sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13")
-    sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
-    if a.root == nil: a.root = getBottom(a)
-    add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size)
+    sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
+    sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
+    when not defined(gcDestructors):
+      if a.root == nil: a.root = getBottom(a)
+      add(a, a.root, cast[int](result), cast[int](result)+%size)
     inc a.occ, c.size
     trackSize(c.size)
   sysAssert(isAccessible(a, result), "rawAlloc 14")
   sysAssert(allocInv(a), "rawAlloc: end")
-  when logAlloc: cprintf("var pointer_%p = alloc(%ld)\n", result, requestedSize)
+  when logAlloc: cprintf("var pointer_%p = alloc(%ld) # %p\n", result, requestedSize, addr a)
 
 proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
   result = rawAlloc(a, requestedSize)
   zeroMem(result, requestedSize)
 
 proc rawDealloc(a: var MemRegion, p: pointer) =
+  when defined(nimTypeNames):
+    inc(a.deallocCounter)
   #sysAssert(isAllocatedPtr(a, p), "rawDealloc: no allocated pointer")
   sysAssert(allocInv(a), "rawDealloc: begin")
   var c = pageAddr(p)
+  sysAssert(c != nil, "rawDealloc: begin")
   if isSmallChunk(c):
     # `p` is within a small chunk:
     var c = cast[PSmallChunk](c)
-    var s = c.size
-    dec a.occ, s
-    untrackSize(s)
-    sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
-    sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
-               s == 0, "rawDealloc 3")
+    let s = c.size
+    #       ^ We might access thread foreign storage here.
+    # The other thread cannot possibly free this block as it's still alive.
     var f = cast[ptr FreeCell](p)
-    #echo("setting to nil: ", $cast[ByteAddress](addr(f.zeroField)))
-    sysAssert(f.zeroField != 0, "rawDealloc 1")
-    f.zeroField = 0
-    f.next = c.freeList
-    c.freeList = f
-    when overwriteFree:
-      # set to 0xff to check for usage after free bugs:
-      c_memset(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
-               s -% sizeof(FreeCell))
-    # check if it is not in the freeSmallChunks[s] list:
-    if c.free < s:
-      # add it to the freeSmallChunks[s] array:
-      listAdd(a.freeSmallChunks[s div MemAlign], c)
-      inc(c.free, s)
+    if c.owner == addr(a):
+      # We own the block, there is no foreign thread involved.
+      dec a.occ, s
+      untrackSize(s)
+      sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
+      sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
+                s == 0, "rawDealloc 3")
+      when not defined(gcDestructors):
+        #echo("setting to nil: ", $cast[int](addr(f.zeroField)))
+        sysAssert(f.zeroField != 0, "rawDealloc 1")
+        f.zeroField = 0
+      when overwriteFree:
+        # set to 0xff to check for usage after free bugs:
+        nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
+                s -% sizeof(FreeCell))
+      let activeChunk = a.freeSmallChunks[s div MemAlign]
+      if activeChunk != nil and c != activeChunk:
+        # This pointer is not part of the active chunk, lend it out
+        #  and do not adjust the current chunk (same logic as compensateCounters.)
+        # Put the cell into the active chunk,
+        #  may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
+        #  This queue would otherwise waste memory in the form of free cells until we return to those chunks.
+        f.next = activeChunk.freeList
+        activeChunk.freeList = f # lend the cell
+        inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
+        inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
+      else:
+        f.next = c.freeList
+        c.freeList = f
+        if c.free < s:
+          # The chunk could not have been active as it didn't have enough space to give
+          listAdd(a.freeSmallChunks[s div MemAlign], c)
+          inc(c.free, s)
+        else:
+          inc(c.free, s)
+          # Free only if the entire chunk is unused and there are no borrowed cells.
+          # If the chunk were to be freed while it references foreign cells,
+          #  the foreign chunks will leak memory and can never be freed.
+          if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
+            listRemove(a.freeSmallChunks[s div MemAlign], c)
+            c.size = SmallChunkSize
+            freeBigChunk(a, cast[PBigChunk](c))
     else:
-      inc(c.free, s)
-      if c.free == SmallChunkSize-smallChunkOverhead():
-        listRemove(a.freeSmallChunks[s div MemAlign], c)
-        c.size = SmallChunkSize
-        freeBigChunk(a, cast[PBigChunk](c))
-    sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
+      when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
+
+      when defined(gcDestructors):
+        addToSharedFreeList(c, f, s div MemAlign)
+    sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
                s == 0, "rawDealloc 2")
   else:
     # set to 0xff to check for usage after free bugs:
-    when overwriteFree: c_memset(p, -1'i32, c.size -% bigChunkOverhead())
-    # free big chunk
-    var c = cast[PBigChunk](c)
-    dec a.occ, c.size
-    untrackSize(c.size)
-    sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
-    a.deleted = getBottom(a)
-    del(a, a.root, cast[int](addr(c.data)))
-    if c.size >= HugeChunkSize: freeHugeChunk(a, c)
-    else: freeBigChunk(a, c)
+    when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
+    when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner)
+    when defined(gcDestructors):
+      if c.owner == addr(a):
+        deallocBigChunk(a, cast[PBigChunk](c))
+      else:
+        addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
+    else:
+      deallocBigChunk(a, cast[PBigChunk](c))
+
   sysAssert(allocInv(a), "rawDealloc: end")
-  when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
+  #when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
 
-proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
-  if isAccessible(a, p):
-    var c = pageAddr(p)
-    if not chunkUnused(c):
-      if isSmallChunk(c):
-        var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
-                     smallChunkOverhead()
-        result = (c.acc >% offset) and (offset %% c.size == 0) and
-          (cast[ptr FreeCell](p).zeroField >% 1)
-      else:
-        var c = cast[PBigChunk](c)
-        result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
+when not defined(gcDestructors):
+  proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
+    if isAccessible(a, p):
+      var c = pageAddr(p)
+      if not chunkUnused(c):
+        if isSmallChunk(c):
+          var c = cast[PSmallChunk](c)
+          var offset = (cast[int](p) and (PageSize-1)) -%
+                      smallChunkOverhead()
+          result = (c.acc.int >% offset) and (offset %% c.size == 0) and
+            (cast[ptr FreeCell](p).zeroField >% 1)
+        else:
+          var c = cast[PBigChunk](c)
+          result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
 
-proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
-  a.minLargeObj = lowGauge(a.root)
-  a.maxLargeObj = highGauge(a.root)
+  proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
+    a.minLargeObj = lowGauge(a.root)
+    a.maxLargeObj = highGauge(a.root)
 
-proc interiorAllocatedPtr(a: MemRegion, p: pointer): pointer =
-  if isAccessible(a, p):
-    var c = pageAddr(p)
-    if not chunkUnused(c):
-      if isSmallChunk(c):
-        var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
-                     smallChunkOverhead()
-        if c.acc >% offset:
-          sysAssert(cast[ByteAddress](addr(c.data)) +% offset ==
-                    cast[ByteAddress](p), "offset is not what you think it is")
-          var d = cast[ptr FreeCell](cast[ByteAddress](addr(c.data)) +%
-                    offset -% (offset %% c.size))
-          if d.zeroField >% 1:
+  proc interiorAllocatedPtr(a: MemRegion, p: pointer): pointer =
+    if isAccessible(a, p):
+      var c = pageAddr(p)
+      if not chunkUnused(c):
+        if isSmallChunk(c):
+          var c = cast[PSmallChunk](c)
+          var offset = (cast[int](p) and (PageSize-1)) -%
+                      smallChunkOverhead()
+          if c.acc.int >% offset:
+            sysAssert(cast[int](addr(c.data)) +% offset ==
+                      cast[int](p), "offset is not what you think it is")
+            var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
+                      offset -% (offset %% c.size))
+            if d.zeroField >% 1:
+              result = d
+              sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
+        else:
+          var c = cast[PBigChunk](c)
+          var d = addr(c.data)
+          if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
             result = d
             sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
-      else:
-        var c = cast[PBigChunk](c)
-        var d = addr(c.data)
-        if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
-          result = d
-          sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
-  else:
-    var q = cast[int](p)
-    if q >=% a.minLargeObj and q <=% a.maxLargeObj:
-      # this check is highly effective! Test fails for 99,96% of all checks on
-      # an x86-64.
-      var avlNode = inRange(a.root, q)
-      if avlNode != nil:
-        var k = cast[pointer](avlNode.key)
-        var c = cast[PBigChunk](pageAddr(k))
-        sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
-        if cast[ptr FreeCell](k).zeroField >% 1:
-          result = k
-          sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
+    else:
+      var q = cast[int](p)
+      if q >=% a.minLargeObj and q <=% a.maxLargeObj:
+        # this check is highly effective! Test fails for 99,96% of all checks on
+        # an x86-64.
+        var avlNode = inRange(a.root, q)
+        if avlNode != nil:
+          var k = cast[pointer](avlNode.key)
+          var c = cast[PBigChunk](pageAddr(k))
+          sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
+          if cast[ptr FreeCell](k).zeroField >% 1:
+            result = k
+            sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
 
 proc ptrSize(p: pointer): int =
-  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
-  var c = pageAddr(p)
-  sysAssert(not chunkUnused(c), "ptrSize")
-  result = c.size -% sizeof(FreeCell)
-  if not isSmallChunk(c):
-    dec result, bigChunkOverhead()
+  when not defined(gcDestructors):
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
+    var c = pageAddr(p)
+    sysAssert(not chunkUnused(c), "ptrSize")
+    result = c.size -% sizeof(FreeCell)
+    if not isSmallChunk(c):
+      dec result, bigChunkOverhead()
+  else:
+    var c = pageAddr(p)
+    sysAssert(not chunkUnused(c), "ptrSize")
+    result = c.size
+    if not isSmallChunk(c):
+      dec result, bigChunkOverhead()
 
 proc alloc(allocator: var MemRegion, size: Natural): pointer {.gcsafe.} =
-  result = rawAlloc(allocator, size+sizeof(FreeCell))
-  cast[ptr FreeCell](result).zeroField = 1 # mark it as used
-  sysAssert(not isAllocatedPtr(allocator, result), "alloc")
-  result = cast[pointer](cast[ByteAddress](result) +% sizeof(FreeCell))
-  track("alloc", result, size)
+  when not defined(gcDestructors):
+    result = rawAlloc(allocator, size+sizeof(FreeCell))
+    cast[ptr FreeCell](result).zeroField = 1 # mark it as used
+    sysAssert(not isAllocatedPtr(allocator, result), "alloc")
+    result = cast[pointer](cast[int](result) +% sizeof(FreeCell))
+    track("alloc", result, size)
+  else:
+    result = rawAlloc(allocator, size)
 
 proc alloc0(allocator: var MemRegion, size: Natural): pointer =
   result = alloc(allocator, size)
   zeroMem(result, size)
 
 proc dealloc(allocator: var MemRegion, p: pointer) =
-  sysAssert(p != nil, "dealloc: p is nil")
-  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
-  sysAssert(x != nil, "dealloc: x is nil")
-  sysAssert(isAccessible(allocator, x), "is not accessible")
-  sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
-  rawDealloc(allocator, x)
-  sysAssert(not isAllocatedPtr(allocator, x), "dealloc: object still accessible")
-  track("dealloc", p, 0)
+  when not defined(gcDestructors):
+    sysAssert(p != nil, "dealloc: p is nil")
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
+    sysAssert(x != nil, "dealloc: x is nil")
+    sysAssert(isAccessible(allocator, x), "is not accessible")
+    sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
+    rawDealloc(allocator, x)
+    sysAssert(not isAllocatedPtr(allocator, x), "dealloc: object still accessible")
+    track("dealloc", p, 0)
+  else:
+    rawDealloc(allocator, p)
 
 proc realloc(allocator: var MemRegion, p: pointer, newsize: Natural): pointer =
   if newsize > 0:
-    result = alloc0(allocator, newsize)
+    result = alloc(allocator, newsize)
     if p != nil:
       copyMem(result, p, min(ptrSize(p), newsize))
       dealloc(allocator, p)
   elif p != nil:
     dealloc(allocator, p)
 
+proc realloc0(allocator: var MemRegion, p: pointer, oldsize, newsize: Natural): pointer =
+  result = realloc(allocator, p, newsize)
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldsize)), newsize - oldsize)
+
 proc deallocOsPages(a: var MemRegion) =
   # we free every 'ordinarily' allocated page by iterating over the page bits:
   var it = addr(a.heapLinks)
@@ -961,7 +1186,7 @@ proc deallocOsPages(a: var MemRegion) =
       let (p, size) = it.chunks[i]
       when defined(debugHeapLinks):
         cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a),
-          it, it.origSize, next)
+          it, size, next)
       sysAssert size >= PageSize, "origSize too small"
       osDeallocPages(p, size)
     it = next
@@ -975,33 +1200,42 @@ proc getOccupiedMem(a: MemRegion): int {.inline.} =
   result = a.occ
   # a.currMem - a.freeMem
 
+when defined(nimTypeNames):
+  proc getMemCounters(a: MemRegion): (int, int) {.inline.} =
+    (a.allocCounter, a.deallocCounter)
+
 # ---------------------- thread memory region -------------------------------
 
-template instantiateForRegion(allocator: untyped) =
+template instantiateForRegion(allocator: untyped) {.dirty.} =
   {.push stackTrace: off.}
 
-  when defined(fulldebug):
+  when defined(nimFulldebug):
     proc interiorAllocatedPtr*(p: pointer): pointer =
       result = interiorAllocatedPtr(allocator, p)
 
     proc isAllocatedPtr*(p: pointer): bool =
-      let p = cast[pointer](cast[ByteAddress](p)-%ByteAddress(sizeof(Cell)))
+      let p = cast[pointer](cast[int](p)-%ByteAddress(sizeof(Cell)))
       result = isAllocatedPtr(allocator, p)
 
   proc deallocOsPages = deallocOsPages(allocator)
 
-  proc alloc(size: Natural): pointer =
+  proc allocImpl(size: Natural): pointer =
     result = alloc(allocator, size)
 
-  proc alloc0(size: Natural): pointer =
+  proc alloc0Impl(size: Natural): pointer =
     result = alloc0(allocator, size)
 
-  proc dealloc(p: pointer) =
+  proc deallocImpl(p: pointer) =
     dealloc(allocator, p)
 
-  proc realloc(p: pointer, newsize: Natural): pointer =
+  proc reallocImpl(p: pointer, newSize: Natural): pointer =
     result = realloc(allocator, p, newSize)
 
+  proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    result = realloc(allocator, p, newSize)
+    if newSize > oldSize:
+      zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
+
   when false:
     proc countFreeMem(): int =
       # only used for assertions
@@ -1010,64 +1244,94 @@ template instantiateForRegion(allocator: untyped) =
         inc(result, it.size)
         it = it.next
 
+  when hasThreadSupport and not defined(gcDestructors):
+    proc addSysExitProc(quitProc: proc() {.noconv.}) {.importc: "atexit", header: "<stdlib.h>".}
+
+    var sharedHeap: MemRegion
+    var heapLock: SysLock
+    initSysLock(heapLock)
+    addSysExitProc(proc() {.noconv.} = deinitSys(heapLock))
+
   proc getFreeMem(): int =
-    result = allocator.freeMem
     #sysAssert(result == countFreeMem())
+    result = allocator.freeMem
+
+  proc getTotalMem(): int =
+    result = allocator.currMem
 
-  proc getTotalMem(): int = return allocator.currMem
-  proc getOccupiedMem(): int = return allocator.occ #getTotalMem() - getFreeMem()
-  proc getMaxMem*(): int = return getMaxMem(allocator)
+  proc getOccupiedMem(): int =
+    result = allocator.occ #getTotalMem() - getFreeMem()
+
+  proc getMaxMem*(): int =
+    result = getMaxMem(allocator)
+
+  when defined(nimTypeNames):
+    proc getMemCounters*(): (int, int) = getMemCounters(allocator)
 
   # -------------------- shared heap region ----------------------------------
-  when hasThreadSupport:
-    var sharedHeap: MemRegion
-    var heapLock: SysLock
-    initSysLock(heapLock)
 
-  proc allocShared(size: Natural): pointer =
-    when hasThreadSupport:
+  proc allocSharedImpl(size: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       result = alloc(sharedHeap, size)
       releaseSys(heapLock)
     else:
-      result = alloc(size)
+      result = allocImpl(size)
 
-  proc allocShared0(size: Natural): pointer =
-    result = allocShared(size)
+  proc allocShared0Impl(size: Natural): pointer =
+    result = allocSharedImpl(size)
     zeroMem(result, size)
 
-  proc deallocShared(p: pointer) =
-    when hasThreadSupport:
+  proc deallocSharedImpl(p: pointer) =
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       dealloc(sharedHeap, p)
       releaseSys(heapLock)
     else:
-      dealloc(p)
+      deallocImpl(p)
 
-  proc reallocShared(p: pointer, newsize: Natural): pointer =
-    when hasThreadSupport:
+  proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
-      result = realloc(sharedHeap, p, newsize)
+      result = realloc(sharedHeap, p, newSize)
       releaseSys(heapLock)
     else:
-      result = realloc(p, newSize)
+      result = reallocImpl(p, newSize)
 
-  when hasThreadSupport:
-
-    template sharedMemStatsShared(v: int) {.immediate.} =
+  proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
-      result = v
+      result = realloc0(sharedHeap, p, oldSize, newSize)
       releaseSys(heapLock)
+    else:
+      result = realloc0Impl(p, oldSize, newSize)
+
+  when hasThreadSupport:
+    when defined(gcDestructors):
+      proc getFreeSharedMem(): int =
+        allocator.freeMem
+
+      proc getTotalSharedMem(): int =
+        allocator.currMem
+
+      proc getOccupiedSharedMem(): int =
+        allocator.occ
+
+    else:
+      template sharedMemStatsShared(v: int) =
+        acquireSys(heapLock)
+        result = v
+        releaseSys(heapLock)
 
-    proc getFreeSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.freeMem)
+      proc getFreeSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.freeMem)
 
-    proc getTotalSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.currMem)
+      proc getTotalSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.currMem)
 
-    proc getOccupiedSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.occ)
-      #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
+      proc getOccupiedSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.occ)
+        #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
   {.pop.}
 
 {.pop.}
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 0bac979e7..3098e17d6 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -7,114 +7,227 @@
 #    distribution, for details about the copyright.
 #
 
-# This include file contains headers of Ansi C procs
+# This module contains headers of Ansi C procs
 # and definitions of Ansi C types in Nim syntax
 # All symbols are prefixed with 'c_' to avoid ambiguities
 
-{.push hints:off}
+{.push hints:off, stack_trace: off, profiler: off.}
 
-proc c_memchr(s: pointer, c: cint, n: csize): pointer {.
+proc c_memchr*(s: pointer, c: cint, n: csize_t): pointer {.
   importc: "memchr", header: "<string.h>".}
-proc c_memcmp(a, b: pointer, size: csize): cint {.
+proc c_memcmp*(a, b: pointer, size: csize_t): cint {.
   importc: "memcmp", header: "<string.h>", noSideEffect.}
-proc c_memcpy(a, b: pointer, size: csize): pointer {.
+proc c_memcpy*(a, b: pointer, size: csize_t): pointer {.
   importc: "memcpy", header: "<string.h>", discardable.}
-proc c_memmove(a, b: pointer, size: csize): pointer {.
+proc c_memmove*(a, b: pointer, size: csize_t): pointer {.
   importc: "memmove", header: "<string.h>",discardable.}
-proc c_memset(p: pointer, value: cint, size: csize): pointer {.
+proc c_memset*(p: pointer, value: cint, size: csize_t): pointer {.
   importc: "memset", header: "<string.h>", discardable.}
-proc c_strcmp(a, b: cstring): cint {.
+proc c_strcmp*(a, b: cstring): cint {.
   importc: "strcmp", header: "<string.h>", noSideEffect.}
+proc c_strlen*(a: cstring): csize_t {.
+  importc: "strlen", header: "<string.h>", noSideEffect.}
+proc c_abort*() {.
+  importc: "abort", header: "<stdlib.h>", noSideEffect, noreturn.}
 
-when defined(linux) and defined(amd64):
+
+when defined(nimBuiltinSetjmp):
+  type
+    C_JmpBuf* = array[5, pointer]
+elif defined(linux) and defined(amd64):
   type
-    C_JmpBuf {.importc: "jmp_buf", header: "<setjmp.h>", bycopy.} = object
+    C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>", bycopy.} = object
         abi: array[200 div sizeof(clong), clong]
 else:
   type
-    C_JmpBuf {.importc: "jmp_buf", header: "<setjmp.h>".} = object
+    C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>".} = object
 
 
+type CSighandlerT = proc (a: cint) {.noconv.}
 when defined(windows):
   const
-    SIGABRT = cint(22)
-    SIGFPE = cint(8)
-    SIGILL = cint(4)
-    SIGINT = cint(2)
-    SIGSEGV = cint(11)
+    SIGABRT* = cint(22)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
     SIGTERM = cint(15)
+    SIG_DFL* = cast[CSighandlerT](0)
 elif defined(macosx) or defined(linux) or defined(freebsd) or
      defined(openbsd) or defined(netbsd) or defined(solaris) or
-     defined(dragonfly):
+     defined(dragonfly) or defined(nintendoswitch) or defined(genode) or
+     defined(aix) or hostOS == "standalone":
   const
-    SIGABRT = cint(6)
-    SIGFPE = cint(8)
-    SIGILL = cint(4)
-    SIGINT = cint(2)
-    SIGSEGV = cint(11)
-    SIGTERM = cint(15)
-    SIGPIPE = cint(13)
+    SIGABRT* = cint(6)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
+    SIGTERM* = cint(15)
+    SIGPIPE* = cint(13)
+    SIG_DFL* = CSighandlerT(nil)
+elif defined(haiku):
+  const
+    SIGABRT* = cint(6)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
+    SIGTERM* = cint(15)
+    SIGPIPE* = cint(7)
+    SIG_DFL* = CSighandlerT(nil)
 else:
-  when NoFakeVars:
+  when defined(nimscript):
     {.error: "SIGABRT not ported to your platform".}
   else:
     var
-      SIGINT {.importc: "SIGINT", nodecl.}: cint
-      SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
-      SIGABRT {.importc: "SIGABRT", nodecl.}: cint
-      SIGFPE {.importc: "SIGFPE", nodecl.}: cint
-      SIGILL {.importc: "SIGILL", nodecl.}: cint
+      SIGINT* {.importc: "SIGINT", nodecl.}: cint
+      SIGSEGV* {.importc: "SIGSEGV", nodecl.}: cint
+      SIGABRT* {.importc: "SIGABRT", nodecl.}: cint
+      SIGFPE* {.importc: "SIGFPE", nodecl.}: cint
+      SIGILL* {.importc: "SIGILL", nodecl.}: cint
+      SIG_DFL* {.importc: "SIG_DFL", nodecl.}: CSighandlerT
     when defined(macosx) or defined(linux):
-      var SIGPIPE {.importc: "SIGPIPE", nodecl.}: cint
+      var SIGPIPE* {.importc: "SIGPIPE", nodecl.}: cint
 
 when defined(macosx):
-  const SIGBUS = cint(10)
-else:
-  template SIGBUS: untyped = SIGSEGV
+  const SIGBUS* = cint(10)
+elif defined(haiku):
+  const SIGBUS* = cint(30)
 
-when defined(nimSigSetjmp) and not defined(nimStdSetjmp):
-  proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.
+# "nimRawSetjmp" is defined by default for certain platforms, so we need the
+# "nimStdSetjmp" escape hatch with it.
+when defined(nimSigSetjmp):
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
     header: "<setjmp.h>", importc: "siglongjmp".}
-  template c_setjmp(jmpb: C_JmpBuf): cint =
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
     proc c_sigsetjmp(jmpb: C_JmpBuf, savemask: cint): cint {.
       header: "<setjmp.h>", importc: "sigsetjmp".}
     c_sigsetjmp(jmpb, 0)
+elif defined(nimBuiltinSetjmp):
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) =
+    # Apple's Clang++ has trouble converting array names to pointers, so we need
+    # to be very explicit here.
+    proc c_builtin_longjmp(jmpb: ptr pointer, retval: cint) {.
+      importc: "__builtin_longjmp", nodecl.}
+    # The second parameter needs to be 1 and sometimes the C/C++ compiler checks it.
+    c_builtin_longjmp(unsafeAddr jmpb[0], 1)
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
+    proc c_builtin_setjmp(jmpb: ptr pointer): cint {.
+      importc: "__builtin_setjmp", nodecl.}
+    c_builtin_setjmp(unsafeAddr jmpb[0])
+
 elif defined(nimRawSetjmp) and not defined(nimStdSetjmp):
-  proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.
-    header: "<setjmp.h>", importc: "_longjmp".}
-  proc c_setjmp(jmpb: C_JmpBuf): cint {.
-    header: "<setjmp.h>", importc: "_setjmp".}
+  when defined(windows):
+    # No `_longjmp()` on Windows.
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "longjmp".}
+    when defined(vcc) or defined(clangcl):
+      proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+        header: "<setjmp.h>", importc: "setjmp".}
+    else:
+      # The Windows `_setjmp()` takes two arguments, with the second being an
+      # undocumented buffer used by the SEH mechanism for stack unwinding.
+      # Mingw-w64 has been trying to get it right for years, but it's still
+      # prone to stack corruption during unwinding, so we disable that by setting
+      # it to NULL.
+      # More details: https://github.com/status-im/nimbus-eth2/issues/3121
+      when defined(nimHasStyleChecks):
+        {.push styleChecks: off.}
+
+      proc c_setjmp*(jmpb: C_JmpBuf): cint =
+        proc c_setjmp_win(jmpb: C_JmpBuf, ctx: pointer): cint {.
+          header: "<setjmp.h>", importc: "_setjmp".}
+        c_setjmp_win(jmpb, nil)
+
+      when defined(nimHasStyleChecks):
+        {.pop.}
+  else:
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "_longjmp".}
+    proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+      header: "<setjmp.h>", importc: "_setjmp".}
 else:
-  proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
     header: "<setjmp.h>", importc: "longjmp".}
-  proc c_setjmp(jmpb: C_JmpBuf): cint {.
+  proc c_setjmp*(jmpb: C_JmpBuf): cint {.
     header: "<setjmp.h>", importc: "setjmp".}
 
-type c_sighandler_t = proc (a: cint) {.noconv.}
-proc c_signal(sign: cint, handler: proc (a: cint) {.noconv.}): c_sighandler_t {.
+proc c_signal*(sign: cint, handler: CSighandlerT): CSighandlerT {.
   importc: "signal", header: "<signal.h>", discardable.}
+proc c_raise*(sign: cint): cint {.importc: "raise", header: "<signal.h>".}
 
-proc c_fprintf(f: File, frmt: cstring): cint {.
+type
+  CFile {.importc: "FILE", header: "<stdio.h>",
+          incompleteStruct.} = object
+  CFilePtr* = ptr CFile ## The type representing a file handle.
+
+# duplicated between io and ansi_c
+const stdioUsesMacros = (defined(osx) or defined(freebsd) or defined(dragonfly)) and not defined(emscripten)
+const stderrName = when stdioUsesMacros: "__stderrp" else: "stderr"
+const stdoutName = when stdioUsesMacros: "__stdoutp" else: "stdout"
+const stdinName = when stdioUsesMacros: "__stdinp" else: "stdin"
+
+var
+  cstderr* {.importc: stderrName, header: "<stdio.h>".}: CFilePtr
+  cstdout* {.importc: stdoutName, header: "<stdio.h>".}: CFilePtr
+  cstdin* {.importc: stdinName, header: "<stdio.h>".}: CFilePtr
+
+proc c_fprintf*(f: CFilePtr, frmt: cstring): cint {.
   importc: "fprintf", header: "<stdio.h>", varargs, discardable.}
-proc c_printf(frmt: cstring): cint {.
+proc c_printf*(frmt: cstring): cint {.
   importc: "printf", header: "<stdio.h>", varargs, discardable.}
 
-proc c_sprintf(buf, frmt: cstring): cint {.
+proc c_fputs*(c: cstring, f: CFilePtr): cint {.
+  importc: "fputs", header: "<stdio.h>", discardable.}
+proc c_fputc*(c: char, f: CFilePtr): cint {.
+  importc: "fputc", header: "<stdio.h>", discardable.}
+
+proc c_sprintf*(buf, frmt: cstring): cint {.
   importc: "sprintf", header: "<stdio.h>", varargs, noSideEffect.}
   # we use it only in a way that cannot lead to security issues
 
-when defined(windows):
-  proc c_fileno(f: File): cint {.
-      importc: "_fileno", header: "<stdio.h>".}
+proc c_snprintf*(buf: cstring, n: csize_t, frmt: cstring): cint {.
+  importc: "snprintf", header: "<stdio.h>", varargs, noSideEffect.}
+
+when defined(zephyr) and not defined(zephyrUseLibcMalloc):
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "k_malloc", header: "<kernel.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "k_calloc", header: "<kernel.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "k_free", header: "<kernel.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer =
+    # Zephyr's kernel malloc doesn't support realloc
+    result = c_malloc(newSize)
+    # match the ansi c behavior
+    if not result.isNil():
+      copyMem(result, p, newSize)
+      c_free(p)
 else:
-  proc c_fileno(f: File): cint {.
-      importc: "fileno", header: "<fcntl.h>".}
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "malloc", header: "<stdlib.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "calloc", header: "<stdlib.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "free", header: "<stdlib.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer {.
+    importc: "realloc", header: "<stdlib.h>".}
+
+proc c_fwrite*(buf: pointer, size, n: csize_t, f: CFilePtr): csize_t {.
+  importc: "fwrite", header: "<stdio.h>".}
+
+proc c_fflush*(f: CFilePtr): cint {.
+  importc: "fflush", header: "<stdio.h>".}
+
+proc rawWriteString*(f: CFilePtr, s: cstring, length: int) {.compilerproc, nonReloadable, inline.} =
+  # we cannot throw an exception here!
+  discard c_fwrite(s, 1, cast[csize_t](length), f)
+  discard c_fflush(f)
 
-proc c_malloc(size: csize): pointer {.
-  importc: "malloc", header: "<stdlib.h>".}
-proc c_free(p: pointer) {.
-  importc: "free", header: "<stdlib.h>".}
-proc c_realloc(p: pointer, newsize: csize): pointer {.
-  importc: "realloc", header: "<stdlib.h>".}
+proc rawWrite*(f: CFilePtr, s: cstring) {.compilerproc, nonReloadable, inline.} =
+  # we cannot throw an exception here!
+  discard c_fwrite(s, 1, c_strlen(s), f)
+  discard c_fflush(f)
 
-{.pop}
+{.pop.}
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
new file mode 100644
index 000000000..d001fcaa5
--- /dev/null
+++ b/lib/system/arc.nim
@@ -0,0 +1,267 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#[
+In this new runtime we simplify the object layouts a bit: The runtime type
+information is only accessed for the objects that have it and it's always
+at offset 0 then. The ``ref`` object header is independent from the
+runtime type and only contains a reference count.
+]#
+
+when defined(gcOrc):
+  const
+    rcIncrement = 0b10000 # so that lowest 4 bits are not touched
+    rcMask = 0b1111
+    rcShift = 4      # shift by rcShift to get the reference counter
+
+else:
+  const
+    rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+    rcMask = 0b111
+    rcShift = 3      # shift by rcShift to get the reference counter
+
+const
+  orcLeakDetector = defined(nimOrcLeakDetector)
+
+type
+  RefHeader = object
+    rc: int # the object header is now a single RC field.
+            # we could remove it in non-debug builds for the 'owned ref'
+            # design but this seems unwise.
+    when defined(gcOrc):
+      rootIdx: int # thanks to this we can delete potential cycle roots
+                   # in O(1) without doubly linked lists
+    when defined(nimArcDebug) or defined(nimArcIds):
+      refId: int
+    when defined(gcOrc) and orcLeakDetector:
+      filename: cstring
+      line: int
+
+  Cell = ptr RefHeader
+
+template setFrameInfo(c: Cell) =
+  when orcLeakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
+template head(p: pointer): Cell =
+  cast[Cell](cast[int](p) -% sizeof(RefHeader))
+
+const
+  traceCollector = defined(traceArc)
+
+when defined(nimArcDebug):
+  include cellsets
+
+  const traceId = 20 # 1037
+
+  var gRefId: int
+  var freedCells: CellSet
+elif defined(nimArcIds):
+  var gRefId: int
+
+  const traceId = -1
+
+when defined(gcAtomicArc) and hasThreadSupport:
+  template decrement(cell: Cell): untyped =
+    discard atomicDec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    discard atomicInc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    atomicLoadN(x.rc.addr, ATOMIC_ACQUIRE) shr rcShift
+else:
+  template decrement(cell: Cell): untyped =
+    dec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    inc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    x.rc shr rcShift
+
+proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = alignedAlloc0(s, alignment) +! hdrSize
+  when defined(nimArcDebug) or defined(nimArcIds):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObj] %p %ld\n", result, head(result).count)
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
+  # Same as 'newNewObj' but do not initialize the memory to zero.
+  # The codegen proved for us that this is not necessary.
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
+  head(result).rc = 0
+  when defined(gcOrc):
+    head(result).rootIdx = 0
+  when defined(nimArcDebug):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObjUninit] %p %ld\n", result, head(result).count)
+
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimDecWeakRef(p: pointer) {.compilerRtl, inl.} =
+  decrement head(p)
+
+proc isUniqueRef*[T](x: ref T): bool {.inline.} =
+  ## Returns true if the object `x` points to is uniquely referenced. Such
+  ## an object can potentially be passed over to a different thread safely,
+  ## if great care is taken. This queries the internal reference count of
+  ## the object which is subject to lots of optimizations! In other words
+  ## the value of `isUniqueRef` can depend on the used compiler version and
+  ## optimizer setting.
+  ## Nevertheless it can be used as a very valuable debugging tool and can
+  ## be used to specify the constraints of a threading related API
+  ## via `assert isUniqueRef(x)`.
+  head(cast[pointer](x)).rc == 0
+
+proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
+  when defined(nimArcDebug):
+    if head(p).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).count)
+
+  increment head(p)
+  when traceCollector:
+    cprintf("[INCREF] %p\n", head(p))
+
+when not defined(gcOrc) or defined(nimThinout):
+  proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+    # This is only used by the old RTTI mechanism and we know
+    # that 'dest[]' is nil and needs no destruction. Which is really handy
+    # as we cannot destroy the object reliably if it's an object of unknown
+    # compile-time type.
+    dest[] = src
+    if src != nil: nimIncRef src
+
+when not defined(nimscript) and defined(nimArcDebug):
+  proc deallocatedRefId*(p: pointer): int =
+    ## Returns the ref's ID if the ref was already deallocated. This
+    ## is a memory corruption check. Returns 0 if there is no error.
+    let c = head(p)
+    if freedCells.data != nil and freedCells.contains(c):
+      result = c.refId
+    else:
+      result = 0
+
+proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
+  when not defined(nimscript):
+    when traceCollector:
+      cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
+    when defined(nimOwnedEnabled):
+      if head(p).rc >= rcIncrement:
+        cstderr.rawWrite "[FATAL] dangling references exist\n"
+        rawQuit 1
+    when defined(nimArcDebug):
+      # we do NOT really free the memory here in order to reliably detect use-after-frees
+      if freedCells.data == nil: init(freedCells)
+      freedCells.incl head(p)
+    else:
+      let hdrSize = align(sizeof(RefHeader), alignment)
+      alignedDealloc(p -! hdrSize, alignment)
+
+template `=dispose`*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
+#proc dispose*(x: pointer) = nimRawDispose(x)
+
+proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
+  let rti = cast[ptr PNimTypeV2](p)
+  if rti.destructor != nil:
+    cast[DestructorProc](rti.destructor)(p)
+  when false:
+    cstderr.rawWrite cast[ptr PNimTypeV2](p)[].name
+    cstderr.rawWrite "\n"
+    if d == nil:
+      cstderr.rawWrite "bah, nil\n"
+    else:
+      cstderr.rawWrite "has destructor!\n"
+  nimRawDispose(p, rti.align)
+
+when defined(gcOrc):
+  when defined(nimThinout):
+    include cyclebreaker
+  else:
+    include orc
+    #include cyclecollector
+
+proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+
+    when defined(nimArcDebug):
+      if cell.refId == traceId:
+        writeStackTrace()
+        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)
+
+    when defined(gcAtomicArc) and hasThreadSupport:
+      # `atomicDec` returns the new value
+      if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+    else:
+      if cell.count == 0:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+      else:
+        decrement cell
+        # According to Lins it's correct to do nothing else here.
+        when traceCollector:
+          cprintf("[DECREF] %p\n", cell)
+
+proc GC_unref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  var y {.cursor.} = x
+  `=destroy`(y)
+
+proc GC_ref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  if x != nil: nimIncRef(cast[pointer](x))
+
+when not defined(gcOrc):
+  template GC_fullCollect* =
+    ## Forces a full garbage collection pass. With `--mm:arc` a nop.
+    discard
+
+template setupForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+template tearDownForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+proc isObjDisplayCheck(source: PNimTypeV2, targetDepth: int16, token: uint32): bool {.compilerRtl, inl.} =
+  result = targetDepth <= source.depth and source.display[targetDepth] == token
+
+when defined(gcDestructors):
+  proc nimGetVTable(p: pointer, index: int): pointer
+        {.compilerRtl, inline, raises: [].} =
+    result = cast[ptr PNimTypeV2](p).vTable[index]
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
deleted file mode 100644
index 69c558799..000000000
--- a/lib/system/arithm.nim
+++ /dev/null
@@ -1,411 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# simple integer arithmetic with overflow checking
-
-proc raiseOverflow {.compilerproc, noinline.} =
-  # a single proc to reduce code size to a minimum
-  sysFatal(OverflowError, "over- or underflow")
-
-proc raiseDivByZero {.compilerproc, noinline.} =
-  sysFatal(DivByZeroError, "division by zero")
-
-when defined(builtinOverflow):
-  # Builtin compiler functions for improved performance
-  when sizeof(clong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
-
-  elif sizeof(clonglong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
-
-  when sizeof(int) == 8:
-    proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      addInt64Overflow(a, b, c)
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      subInt64Overflow(a, b, c)
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      mulInt64Overflow(a, b, c)
-
-  elif sizeof(int) == 4 and sizeof(cint) == 4:
-    proc addIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
-
-  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
-    if addInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
-    if subInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if mulInt64Overflow(a, b, result):
-      raiseOverflow()
-else:
-  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
-    result = a +% b
-    if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
-    result = a -% b
-    if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if they're
-  # approximately the same. Else, because the native int product is the only
-  # one that can lose catastrophic amounts of information, it's the native int
-  # product that must have overflowed.
-  #
-  proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-    var
-      resAsFloat, floatProd: float64
-    result = a *% b
-    floatProd = toBiggestFloat(a) # conversion
-    floatProd = floatProd * toBiggestFloat(b)
-    resAsFloat = toBiggestFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-proc negInt64(a: int64): int64 {.compilerProc, inline.} =
-  if a != low(int64): return -a
-  raiseOverflow()
-
-proc absInt64(a: int64): int64 {.compilerProc, inline.} =
-  if a != low(int64):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-proc divInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  if a == low(int64) and b == int64(-1):
-    raiseOverflow()
-  return a div b
-
-proc modInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  return a mod b
-
-proc absInt(a: int): int {.compilerProc, inline.} =
-  if a != low(int):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-const
-  asmVersion = defined(I386) and (defined(vcc) or defined(wcc) or
-               defined(dmc) or defined(gcc) or defined(llvm_gcc))
-    # my Version of Borland C++Builder does not have
-    # tasm32, which is needed for assembler blocks
-    # this is why Borland is not included in the 'when'
-
-when asmVersion and not defined(gcc) and not defined(llvm_gcc):
-  # assembler optimized versions for compilers that
-  # have an intel syntax assembler:
-  proc addInt(a, b: int): int {.compilerProc, asmNoStackFrame.} =
-    # a in eax, and b in edx
-    asm """
-        mov eax, ecx
-        add eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc subInt(a, b: int): int {.compilerProc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        sub eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc negInt(a: int): int {.compilerProc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        neg eax
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc divInt(a, b: int): int {.compilerProc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        mov ecx, edx
-        xor edx, edx
-        idiv ecx
-        jno  theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc modInt(a, b: int): int {.compilerProc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        mov ecx, edx
-        xor edx, edx
-        idiv ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        mov eax, edx
-        ret
-    """
-
-  proc mulInt(a, b: int): int {.compilerProc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        mov ecx, edx
-        xor edx, edx
-        imul ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
-  proc addInt(a, b: int): int {.compilerProc, inline.} =
-    # don't use a pure proc here!
-    asm """
-      "addl %%ecx, %%eax\n"
-      "jno 1\n"
-      "call _raiseOverflow\n"
-      "1: \n"
-      :"=a"(`result`)
-      :"a"(`a`), "c"(`b`)
-    """
-    #".intel_syntax noprefix"
-    #/* Intel syntax here */
-    #".att_syntax"
-
-  proc subInt(a, b: int): int {.compilerProc, inline.} =
-    asm """ "subl %%ecx,%%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`), "c"(`b`)
-    """
-
-  proc mulInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "imull %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc negInt(a: int): int {.compilerProc, inline.} =
-    asm """ "negl %%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`)
-    """
-
-  proc divInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc modInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-             "movl %%edx, %%eax"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-when not declared(addInt) and defined(builtinOverflow):
-  proc addInt(a, b: int): int {.compilerProc, inline.} =
-    if addIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(subInt) and defined(builtinOverflow):
-  proc subInt(a, b: int): int {.compilerProc, inline.} =
-    if subIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(mulInt) and defined(builtinOverflow):
-  proc mulInt(a, b: int): int {.compilerProc, inline.} =
-    if mulIntOverflow(a, b, result):
-      raiseOverflow()
-
-# Platform independent versions of the above (slower!)
-when not declared(addInt):
-  proc addInt(a, b: int): int {.compilerProc, inline.} =
-    result = a +% b
-    if (result xor a) >= 0 or (result xor b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(subInt):
-  proc subInt(a, b: int): int {.compilerProc, inline.} =
-    result = a -% b
-    if (result xor a) >= 0 or (result xor not b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(negInt):
-  proc negInt(a: int): int {.compilerProc, inline.} =
-    if a != low(int): return -a
-    raiseOverflow()
-
-when not declared(divInt):
-  proc divInt(a, b: int): int {.compilerProc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    if a == low(int) and b == -1:
-      raiseOverflow()
-    return a div b
-
-when not declared(modInt):
-  proc modInt(a, b: int): int {.compilerProc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    return a mod b
-
-when not declared(mulInt):
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if
-  # they're approximately the same. Else, because the native int product is
-  # the only one that can lose catastrophic amounts of information, it's the
-  # native int product that must have overflowed.
-  #
-  proc mulInt(a, b: int): int {.compilerProc.} =
-    var
-      resAsFloat, floatProd: float
-
-    result = a *% b
-    floatProd = toFloat(a) * toFloat(b)
-    resAsFloat = toFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-# We avoid setting the FPU control word here for compatibility with libraries
-# written in other languages.
-
-proc raiseFloatInvalidOp {.noinline.} =
-  sysFatal(FloatInvalidOpError, "FPU operation caused a NaN result")
-
-proc nanCheck(x: float64) {.compilerProc, inline.} =
-  if x != x: raiseFloatInvalidOp()
-
-proc raiseFloatOverflow(x: float64) {.noinline.} =
-  if x > 0.0:
-    sysFatal(FloatOverflowError, "FPU operation caused an overflow")
-  else:
-    sysFatal(FloatUnderflowError, "FPU operations caused an underflow")
-
-proc infCheck(x: float64) {.compilerProc, inline.} =
-  if x != 0.0 and x*0.5 == x: raiseFloatOverflow(x)
diff --git a/lib/system/arithmetics.nim b/lib/system/arithmetics.nim
new file mode 100644
index 000000000..e229a0f4b
--- /dev/null
+++ b/lib/system/arithmetics.nim
@@ -0,0 +1,405 @@
+proc succ*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Succ", noSideEffect.} =
+  ## Returns the `y`-th successor (default: 1) of the value `x`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised
+  ## or a compile time error occurs.
+  runnableExamples:
+    assert succ(5) == 6
+    assert succ(5, 3) == 8
+
+proc pred*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Pred", noSideEffect.} =
+  ## Returns the `y`-th predecessor (default: 1) of the value `x`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised
+  ## or a compile time error occurs.
+  runnableExamples:
+    assert pred(5) == 4
+    assert pred(5, 3) == 2
+
+proc inc*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Inc", noSideEffect.} =
+  ## Increments the ordinal `x` by `y`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = succ(x, y)`.
+  runnableExamples:
+    var i = 2
+    inc(i)
+    assert i == 3
+    inc(i, 3)
+    assert i == 6
+
+proc dec*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Dec", noSideEffect.} =
+  ## Decrements the ordinal `x` by `y`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = pred(x, y)`.
+  runnableExamples:
+    var i = 2
+    dec(i)
+    assert i == 1
+    dec(i, 3)
+    assert i == -2
+
+
+
+# --------------------------------------------------------------------------
+# built-in operators
+
+# integer calculations:
+proc `+`*(x: int): int {.magic: "UnaryPlusI", noSideEffect.}
+  ## Unary `+` operator for an integer. Has no effect.
+proc `+`*(x: int8): int8 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int16): int16 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int32): int32 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int64): int64 {.magic: "UnaryPlusI", noSideEffect.}
+
+proc `-`*(x: int): int {.magic: "UnaryMinusI", noSideEffect.}
+  ## Unary `-` operator for an integer. Negates `x`.
+proc `-`*(x: int8): int8 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int16): int16 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int32): int32 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int64): int64 {.magic: "UnaryMinusI64", noSideEffect.}
+
+proc `not`*(x: int): int {.magic: "BitnotI", noSideEffect.} =
+  ## Computes the `bitwise complement` of the integer `x`.
+  runnableExamples:
+    assert not 0'u8 == 255
+    assert not 0'i8 == -1
+    assert not 1000'u16 == 64535
+    assert not 1000'i16 == -1001
+proc `not`*(x: int8): int8 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int16): int16 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int32): int32 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int64): int64 {.magic: "BitnotI", noSideEffect.}
+
+proc `+`*(x, y: int): int {.magic: "AddI", noSideEffect.}
+  ## Binary `+` operator for an integer.
+proc `+`*(x, y: int8): int8 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int16): int16 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int32): int32 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int64): int64 {.magic: "AddI", noSideEffect.}
+
+proc `-`*(x, y: int): int {.magic: "SubI", noSideEffect.}
+  ## Binary `-` operator for an integer.
+proc `-`*(x, y: int8): int8 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int16): int16 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int32): int32 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int64): int64 {.magic: "SubI", noSideEffect.}
+
+proc `*`*(x, y: int): int {.magic: "MulI", noSideEffect.}
+  ## Binary `*` operator for an integer.
+proc `*`*(x, y: int8): int8 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int16): int16 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int32): int32 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int64): int64 {.magic: "MulI", noSideEffect.}
+
+proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.} =
+  ## Computes the integer division.
+  ##
+  ## This is roughly the same as `math.trunc(x/y).int`.
+  runnableExamples:
+    assert (1 div 2) == 0
+    assert (2 div 2) == 1
+    assert (3 div 2) == 1
+    assert (7 div 3) == 2
+    assert (-7 div 3) == -2
+    assert (7 div -3) == -2
+    assert (-7 div -3) == 2
+proc `div`*(x, y: int8): int8 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int16): int16 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int32): int32 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int64): int64 {.magic: "DivI", noSideEffect.}
+
+proc `mod`*(x, y: int): int {.magic: "ModI", noSideEffect.} =
+  ## Computes the integer modulo operation (remainder).
+  ##
+  ## This is the same as `x - (x div y) * y`.
+  runnableExamples:
+    assert (7 mod 5) == 2
+    assert (-7 mod 5) == -2
+    assert (7 mod -5) == 2
+    assert (-7 mod -5) == -2
+proc `mod`*(x, y: int8): int8 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int16): int16 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int32): int32 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int64): int64 {.magic: "ModI", noSideEffect.}
+
+when defined(nimOldShiftRight):
+  const shrDepMessage = "`shr` will become sign preserving."
+  proc `shr`*(x: int, y: SomeInteger): int {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int16, y: SomeInteger): int16 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+else:
+  proc `shr`*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
+    ## Computes the `shift right` operation of `x` and `y`, filling
+    ## vacant bit positions with the sign bit.
+    ##
+    ## **Note**: `Operator precedence <manual.html#syntax-precedence>`_
+    ## is different than in *C*.
+    ##
+    ## See also:
+    ## * `ashr func<#ashr,int,SomeInteger>`_ for arithmetic shift right
+    runnableExamples:
+      assert 0b0001_0000'i8 shr 2 == 0b0000_0100'i8
+      assert 0b0000_0001'i8 shr 1 == 0b0000_0000'i8
+      assert 0b1000_0000'i8 shr 4 == 0b1111_1000'i8
+      assert -1 shr 5 == -1
+      assert 1 shr 5 == 0
+      assert 16 shr 2 == 4
+      assert -16 shr 2 == -4
+  proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
+
+
+proc `shl`*(x: int, y: SomeInteger): int {.magic: "ShlI", noSideEffect.} =
+  ## Computes the `shift left` operation of `x` and `y`.
+  ##
+  ## **Note**: `Operator precedence <manual.html#syntax-precedence>`_
+  ## is different than in *C*.
+  runnableExamples:
+    assert 1'i32 shl 4 == 0x0000_0010
+    assert 1'i64 shl 4 == 0x0000_0000_0000_0010
+proc `shl`*(x: int8, y: SomeInteger): int8 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int16, y: SomeInteger): int16 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int32, y: SomeInteger): int32 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int64, y: SomeInteger): int64 {.magic: "ShlI", noSideEffect.}
+
+proc ashr*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
+  ## Shifts right by pushing copies of the leftmost bit in from the left,
+  ## and let the rightmost bits fall off.
+  ##
+  ## Note that `ashr` is not an operator so use the normal function
+  ## call syntax for it.
+  ##
+  ## See also:
+  ## * `shr func<#shr,int,SomeInteger>`_
+  runnableExamples:
+    assert ashr(0b0001_0000'i8, 2) == 0b0000_0100'i8
+    assert ashr(0b1000_0000'i8, 8) == 0b1111_1111'i8
+    assert ashr(0b1000_0000'i8, 1) == 0b1100_0000'i8
+proc ashr*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
+
+proc `and`*(x, y: int): int {.magic: "BitandI", noSideEffect.} =
+  ## Computes the `bitwise and` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 and 0b0101) == 0b0001
+    assert (0b0111 and 0b1100) == 0b0100
+proc `and`*(x, y: int8): int8 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int16): int16 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int32): int32 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int64): int64 {.magic: "BitandI", noSideEffect.}
+
+proc `or`*(x, y: int): int {.magic: "BitorI", noSideEffect.} =
+  ## Computes the `bitwise or` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 or 0b0101) == 0b0111
+    assert (0b0111 or 0b1100) == 0b1111
+proc `or`*(x, y: int8): int8 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int16): int16 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int32): int32 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int64): int64 {.magic: "BitorI", noSideEffect.}
+
+proc `xor`*(x, y: int): int {.magic: "BitxorI", noSideEffect.} =
+  ## Computes the `bitwise xor` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 xor 0b0101) == 0b0110
+    assert (0b0111 xor 0b1100) == 0b1011
+proc `xor`*(x, y: int8): int8 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int16): int16 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int32): int32 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int64): int64 {.magic: "BitxorI", noSideEffect.}
+
+# unsigned integer operations:
+proc `not`*(x: uint): uint {.magic: "BitnotI", noSideEffect.}
+  ## Computes the `bitwise complement` of the integer `x`.
+proc `not`*(x: uint8): uint8 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint16): uint16 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint32): uint32 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint64): uint64 {.magic: "BitnotI", noSideEffect.}
+
+proc `shr`*(x: uint, y: SomeInteger): uint {.magic: "ShrI", noSideEffect.}
+  ## Computes the `shift right` operation of `x` and `y`.
+proc `shr`*(x: uint8, y: SomeInteger): uint8 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint16, y: SomeInteger): uint16 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint32, y: SomeInteger): uint32 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint64, y: SomeInteger): uint64 {.magic: "ShrI", noSideEffect.}
+
+proc `shl`*(x: uint, y: SomeInteger): uint {.magic: "ShlI", noSideEffect.}
+  ## Computes the `shift left` operation of `x` and `y`.
+proc `shl`*(x: uint8, y: SomeInteger): uint8 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint16, y: SomeInteger): uint16 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint32, y: SomeInteger): uint32 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint64, y: SomeInteger): uint64 {.magic: "ShlI", noSideEffect.}
+
+proc `and`*(x, y: uint): uint {.magic: "BitandI", noSideEffect.}
+  ## Computes the `bitwise and` of numbers `x` and `y`.
+proc `and`*(x, y: uint8): uint8 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint16): uint16 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint32): uint32 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint64): uint64 {.magic: "BitandI", noSideEffect.}
+
+proc `or`*(x, y: uint): uint {.magic: "BitorI", noSideEffect.}
+  ## Computes the `bitwise or` of numbers `x` and `y`.
+proc `or`*(x, y: uint8): uint8 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint16): uint16 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint32): uint32 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint64): uint64 {.magic: "BitorI", noSideEffect.}
+
+proc `xor`*(x, y: uint): uint {.magic: "BitxorI", noSideEffect.}
+  ## Computes the `bitwise xor` of numbers `x` and `y`.
+proc `xor`*(x, y: uint8): uint8 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint16): uint16 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint32): uint32 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint64): uint64 {.magic: "BitxorI", noSideEffect.}
+
+proc `+`*(x, y: uint): uint {.magic: "AddU", noSideEffect.}
+  ## Binary `+` operator for unsigned integers.
+proc `+`*(x, y: uint8): uint8 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint16): uint16 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint32): uint32 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint64): uint64 {.magic: "AddU", noSideEffect.}
+
+proc `-`*(x, y: uint): uint {.magic: "SubU", noSideEffect.}
+  ## Binary `-` operator for unsigned integers.
+proc `-`*(x, y: uint8): uint8 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint16): uint16 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint32): uint32 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint64): uint64 {.magic: "SubU", noSideEffect.}
+
+proc `*`*(x, y: uint): uint {.magic: "MulU", noSideEffect.}
+  ## Binary `*` operator for unsigned integers.
+proc `*`*(x, y: uint8): uint8 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint16): uint16 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint32): uint32 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint64): uint64 {.magic: "MulU", noSideEffect.}
+
+proc `div`*(x, y: uint): uint {.magic: "DivU", noSideEffect.}
+  ## Computes the integer division for unsigned integers.
+  ## This is roughly the same as `trunc(x/y)`.
+proc `div`*(x, y: uint8): uint8 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint16): uint16 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint32): uint32 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint64): uint64 {.magic: "DivU", noSideEffect.}
+
+proc `mod`*(x, y: uint): uint {.magic: "ModU", noSideEffect.}
+  ## Computes the integer modulo operation (remainder) for unsigned integers.
+  ## This is the same as `x - (x div y) * y`.
+proc `mod`*(x, y: uint8): uint8 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint16): uint16 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint32): uint32 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint64): uint64 {.magic: "ModU", noSideEffect.}
+
+proc `+=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Inc", noSideEffect.}
+  ## Increments an integer.
+
+proc `-=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Dec", noSideEffect.}
+  ## Decrements an integer.
+
+proc `*=`*[T: SomeInteger](x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Binary `*=` operator for integers.
+  x = x * y
+
+# floating point operations:
+proc `+`*(x: float32): float32 {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float32): float32 {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float32): float32 {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float32): float32 {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float32): float32 {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float32): float32 {.magic: "DivF64", noSideEffect.}
+
+proc `+`*(x: float): float {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float): float {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float): float {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float): float {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float): float {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float): float {.magic: "DivF64", noSideEffect.}
+
+proc `+=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Increments in place a floating point number.
+  x = x + y
+
+proc `-=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Decrements in place a floating point number.
+  x = x - y
+
+proc `*=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Multiplies in place a floating point number.
+  x = x * y
+
+proc `/=`*(x: var float64, y: float64) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+proc `/=`*[T: float|float32](x: var T, y: T) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+# the following have to be included in system, not imported for some reason:
+
+proc `+%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and adds them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) + cast[uint](y))
+proc `+%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) + cast[uint8](y))
+proc `+%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) + cast[uint16](y))
+proc `+%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) + cast[uint32](y))
+proc `+%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) + cast[uint64](y))
+
+proc `-%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and subtracts them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) - cast[uint](y))
+proc `-%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) - cast[uint8](y))
+proc `-%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) - cast[uint16](y))
+proc `-%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) - cast[uint32](y))
+proc `-%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) - cast[uint64](y))
+
+proc `*%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and multiplies them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) * cast[uint](y))
+proc `*%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) * cast[uint8](y))
+proc `*%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) * cast[uint16](y))
+proc `*%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) * cast[uint32](y))
+proc `*%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) * cast[uint64](y))
+
+proc `/%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and divides them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) div cast[uint](y))
+proc `/%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) div cast[uint8](y))
+proc `/%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) div cast[uint16](y))
+proc `/%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) div cast[uint32](y))
+proc `/%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) div cast[uint64](y))
+
+proc `%%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and compute the modulo of `x` and `y`.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) mod cast[uint](y))
+proc `%%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) mod cast[uint8](y))
+proc `%%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) mod cast[uint16](y))
+proc `%%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) mod cast[uint32](y))
+proc `%%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) mod cast[uint64](y))
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 16b56aba7..9f4cbc0fe 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -7,14 +7,16 @@
 #    distribution, for details about the copyright.
 #
 
+include seqs_v2_reimpl
+
 proc genericResetAux(dest: pointer, n: ptr TNimNode) {.benign.}
 
 proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) {.benign.}
 proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
                       shallow: bool) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericAssignAux(cast[pointer](d +% n.offset),
@@ -38,46 +40,67 @@ proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
   #  echo "ugh memory corruption! ", n.kind
   #  quit 1
 
+template deepSeqAssignImpl(operation, additionalArg) {.dirty.} =
+  var d = cast[ptr NimSeqV2Reimpl](dest)
+  var s = cast[ptr NimSeqV2Reimpl](src)
+  d.len = s.len
+  let elem = mt.base
+  d.p = cast[ptr NimSeqPayloadReimpl](newSeqPayload(s.len, elem.size, elem.align))
+
+  let bs = elem.size
+  let ba = elem.align
+  let headerSize = align(sizeof(NimSeqPayloadBase), ba)
+
+  for i in 0..d.len-1:
+    operation(d.p +! (headerSize+i*bs), s.p +! (headerSize+i*bs), mt.base, additionalArg)
+
 proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericAssignAux 2")
   case mt.kind
   of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil or shallow or (
-        cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
-      unsureAsgnRef(x, s2)
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
     else:
-      unsureAsgnRef(x, copyString(cast[NimString](s2)))
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil or shallow or (
+          cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyString(cast[NimString](s2)))
   of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
-      # this can happen! nil sequences are allowed
-      unsureAsgnRef(x, s2)
-      return
-    sysAssert(dest != nil, "genericAssignAux 3")
-    if ntfNoRefs in mt.base.flags:
-      var ss = nimNewSeqOfCap(mt, seq.len)
-      cast[PGenericSeq](ss).len = seq.len
-      unsureAsgnRef(x, ss)
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      copyMem(cast[pointer](dst +% GenericSeqSize),
-              cast[pointer](cast[ByteAddress](s2) +% GenericSeqSize),
-              seq.len * mt.base.size)
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericAssignAux, shallow)
     else:
-      unsureAsgnRef(x, newSeq(mt, seq.len))
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      for i in 0..seq.len-1:
-        genericAssignAux(
-          cast[pointer](dst +% i *% mt.base.size +% GenericSeqSize),
-          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                      GenericSeqSize),
-          mt.base, shallow)
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
+        # this can happen! nil sequences are allowed
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericAssignAux 3")
+      if ntfNoRefs in mt.base.flags:
+        var ss = nimNewSeqOfCap(mt, seq.len)
+        cast[PGenericSeq](ss).len = seq.len
+        unsureAsgnRef(x, ss)
+        var dst = cast[int](cast[PPointer](dest)[])
+        copyMem(cast[pointer](dst +% align(GenericSeqSize, mt.base.align)),
+                cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align)),
+                seq.len *% mt.base.size)
+      else:
+        unsureAsgnRef(x, newSeq(mt, seq.len))
+        var dst = cast[int](cast[PPointer](dest)[])
+        for i in 0..seq.len-1:
+          genericAssignAux(
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            mt.base, shallow)
   of tyObject:
     var it = mt.base
     # don't use recursion here on the PNimType because the subtype
@@ -88,14 +111,23 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
     genericAssignAux(dest, src, mt.node, shallow)
     # we need to copy m_type field for tyObject, as it could be empty for
     # sequence reallocations:
-    var pint = cast[ptr PNimType](dest)
-    # We need to copy the *static* type not the dynamic type:
-    #   if p of TB:
-    #     var tbObj = TB(p)
-    #     tbObj of TC # needs to be false!
-    #c_fprintf(stdout, "%s %s\n", pint[].name, mt.name)
-    chckObjAsgn(cast[ptr PNimType](src)[], mt)
-    pint[] = mt # cast[ptr PNimType](src)[]
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      #chckObjAsgn(cast[ptr PNimTypeV2](src)[].typeInfoV2, mt)
+      pint[] = cast[PNimTypeV2](mt.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      # We need to copy the *static* type not the dynamic type:
+      #   if p of TB:
+      #     var tbObj = TB(p)
+      #     tbObj of TC # needs to be false!
+      #c_fprintf(stdout, "%s %s\n", pint[].name, mt.name)
+      let srcType = cast[ptr PNimType](src)[]
+      if srcType != nil:
+        # `!= nil` needed because of cases where object is not initialized properly (see bug #16706)
+        # note that you can have `srcType == nil` yet `src != nil`
+        chckObjAsgn(srcType, mt)
+      pint[] = mt # cast[ptr PNimType](src)[]
   of tyTuple:
     genericAssignAux(dest, src, mt.node, shallow)
   of tyArray, tyArrayConstr:
@@ -104,26 +136,13 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
                        cast[pointer](s +% i *% mt.base.size), mt.base, shallow)
   of tyRef:
     unsureAsgnRef(cast[PPointer](dest), cast[PPointer](s)[])
-  of tyOptAsRef:
-    let s2 = cast[PPointer](src)[]
-    let d = cast[PPointer](dest)
-    if s2 == nil:
-      unsureAsgnRef(d, s2)
-    else:
-      when declared(usrToCell):
-        let realType = usrToCell(s2).typ
-      else:
-        let realType = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[]
-                       else: mt.base
-      var z = newObj(realType, realType.base.size)
-      genericAssignAux(d, addr z, mt.base, shallow)
   else:
     copyMem(dest, src, mt.size) # copy raw bits
 
-proc genericAssign(dest, src: pointer, mt: PNimType) {.compilerProc.} =
+proc genericAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   genericAssignAux(dest, src, mt, false)
 
-proc genericShallowAssign(dest, src: pointer, mt: PNimType) {.compilerProc.} =
+proc genericShallowAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   genericAssignAux(dest, src, mt, true)
 
 when false:
@@ -143,35 +162,34 @@ when false:
     of tyPtr: k = "ptr"
     of tyRef: k = "ref"
     of tyVar: k = "var"
-    of tyOptAsRef: k = "optref"
     of tySequence: k = "seq"
     of tyProc: k = "proc"
     of tyPointer: k = "range"
     of tyOpenArray: k = "openarray"
     of tyString: k = "string"
-    of tyCString: k = "cstring"
+    of tyCstring: k = "cstring"
     of tyInt: k = "int"
     of tyInt32: k = "int32"
     else: k = "other"
     cprintf("%s %ld\n", k, t.size)
     debugNimType(t.base)
 
-proc genericSeqAssign(dest, src: pointer, mt: PNimType) {.compilerProc.} =
+proc genericSeqAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   var src = src # ugly, but I like to stress the parser sometimes :-)
   genericAssign(dest, addr(src), mt)
 
 proc genericAssignOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericAssign(cast[pointer](d +% i *% mt.base.size),
                   cast[pointer](s +% i *% mt.base.size), mt.base)
 
-proc objectInit(dest: pointer, typ: PNimType) {.compilerProc, benign.}
+proc objectInit(dest: pointer, typ: PNimType) {.compilerproc, benign.}
 proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "objectInitAux")
   of nkSlot: objectInit(cast[pointer](d +% n.offset), n.typ)
@@ -185,13 +203,17 @@ proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
 proc objectInit(dest: pointer, typ: PNimType) =
   # the generic init proc that takes care of initialization of complex
   # objects on the stack or heap
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case typ.kind
   of tyObject:
     # iterate over any structural type
     # here we have to init the type field:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = typ
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = cast[PNimTypeV2](typ.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = typ
     objectInitAux(dest, typ.node)
   of tyTuple:
     objectInitAux(dest, typ.node)
@@ -202,14 +224,9 @@ proc objectInit(dest: pointer, typ: PNimType) =
 
 # ---------------------- assign zero -----------------------------------------
 
-proc nimDestroyRange[T](r: T) {.compilerProc.} =
-  # internal proc used for destroying sequences and arrays
-  mixin `=destroy`
-  for i in countup(0, r.len - 1): `=destroy`(r[i])
-
-proc genericReset(dest: pointer, mt: PNimType) {.compilerProc, benign.}
+proc genericReset(dest: pointer, mt: PNimType) {.compilerproc, benign.}
 proc genericResetAux(dest: pointer, n: ptr TNimNode) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "genericResetAux")
   of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ)
@@ -221,18 +238,35 @@ proc genericResetAux(dest: pointer, n: ptr TNimNode) =
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
 
 proc genericReset(dest: pointer, mt: PNimType) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   sysAssert(mt != nil, "genericReset 2")
   case mt.kind
-  of tyString, tyRef, tyOptAsRef, tySequence:
+  of tyRef:
     unsureAsgnRef(cast[PPointer](dest), nil)
+  of tyString:
+    when defined(nimSeqsV2):
+      var s = cast[ptr NimStringV2](dest)
+      frees(s[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
+  of tySequence:
+    when defined(nimSeqsV2):
+      frees(cast[ptr NimSeqV2Reimpl](dest)[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
   of tyTuple:
     genericResetAux(dest, mt.node)
   of tyObject:
     genericResetAux(dest, mt.node)
     # also reset the type field for tyObject, for correct branch switching!
-    var pint = cast[ptr PNimType](dest)
-    pint[] = nil
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = nil
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = nil
   of tyArray, tyArrayConstr:
     for i in 0..(mt.size div mt.base.size)-1:
       genericReset(cast[pointer](d +% i *% mt.base.size), mt.base)
@@ -241,15 +275,24 @@ proc genericReset(dest: pointer, mt: PNimType) =
 
 proc selectBranch(discVal, L: int,
                   a: ptr array[0x7fff, ptr TNimNode]): ptr TNimNode =
-  result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
-    var x = a[discVal]
-    if x != nil: result = x
+    result = a[discVal]
+    if result == nil:
+      result = a[L]
+  else:
+    result = a[L] # a[L] contains the ``else`` part (but may be nil)
 
 proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
                             a: ptr array[0x7fff, ptr TNimNode],
-                            L: int) {.compilerProc.} =
-  var oldBranch = selectBranch(oldDiscVal, L, a)
-  var newBranch = selectBranch(newDiscVal, L, a)
-  if newBranch != oldBranch and oldDiscVal != 0:
-    sysFatal(FieldError, "assignment to discriminant changes object branch")
+                            L: int) {.compilerproc.} =
+  let oldBranch = selectBranch(oldDiscVal, L, a)
+  let newBranch = selectBranch(newDiscVal, L, a)
+  when defined(nimOldCaseObjects):
+    if newBranch != oldBranch and oldDiscVal != 0:
+      sysFatal(FieldDefect, "assignment to discriminant changes object branch")
+  else:
+    if newBranch != oldBranch:
+      if oldDiscVal != 0:
+        sysFatal(FieldDefect, "assignment to discriminant changes object branch")
+      else:
+        sysFatal(FieldDefect, "assignment to discriminant changes object branch; compile with -d:nimOldCaseObjects for a transition period")
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
deleted file mode 100644
index 56ebde823..000000000
--- a/lib/system/atomics.nim
+++ /dev/null
@@ -1,317 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Atomic operations for Nim.
-{.push stackTrace:off.}
-
-const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)
-
-when someGcc and hasThreadSupport:
-  type AtomMemModel* = distinct cint
-
-  var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
-    ## No barriers or synchronization.
-  var ATOMIC_CONSUME* {.importc: "__ATOMIC_CONSUME", nodecl.}: AtomMemModel
-    ## Data dependency only for both barrier and
-    ## synchronization with another thread.
-  var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
-    ## Barrier to hoisting of code and synchronizes with
-    ## release (or stronger)
-    ## semantic stores from another thread.
-  var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
-    ## Barrier to sinking of code and synchronizes with
-    ## acquire (or stronger)
-    ## semantic loads from another thread.
-  var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
-    ## Full barrier in both directions and synchronizes
-    ## with acquire loads
-    ## and release stores in another thread.
-  var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
-    ## Full barrier in both directions and synchronizes
-    ## with acquire loads
-    ## and release stores in all threads.
-
-  type
-    AtomType* = SomeNumber|pointer|ptr|char|bool
-      ## Type Class representing valid types for use with atomic procs
-  {.deprecated: [TAtomType: AtomType].}
-
-  proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
-    importc: "__atomic_load_n", nodecl.}
-    ## This proc implements an atomic load operation. It returns the contents at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.
-
-  proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_load", nodecl.}
-    ## This is the generic version of an atomic load. It returns the contents at p in ret.
-
-  proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
-    importc: "__atomic_store_n", nodecl.}
-    ## This proc implements an atomic store operation. It writes val at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.
-
-  proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_store", nodecl.}
-    ## This is the generic version of an atomic store. It stores the value of val at p
-
-  proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_exchange_n", nodecl.}
-    ## This proc implements an atomic exchange operation. It writes val at p,
-    ## and returns the previous contents at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL
-
-  proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_exchange", nodecl.}
-    ## This is the generic version of an atomic exchange. It stores the contents at val at p.
-    ## The original value at p is copied into ret.
-
-  proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
-    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange_n ", nodecl.}
-    ## This proc implements an atomic compare and exchange operation. This compares the
-    ## contents at p with the contents at expected and if equal, writes desired at p.
-    ## If they are not equal, the current contents at p is written into expected.
-    ## Weak is true for weak compare_exchange, and false for the strong variation.
-    ## Many targets only offer the strong variation and ignore the parameter.
-    ## When in doubt, use the strong variation.
-    ## True is returned if desired is written at p and the execution is considered
-    ## to conform to the memory model specified by success_memmodel. There are no
-    ## restrictions on what memory model can be used here. False is returned otherwise,
-    ## and the execution is considered to conform to failure_memmodel. This memory model
-    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
-    ## than that specified by success_memmodel.
-
-  proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
-    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange", nodecl.}
-    ## This proc implements the generic version of atomic_compare_exchange.
-    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
-    ## value is also a pointer.
-
-  ## Perform the operation return the new value, all memory models are valid
-  proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_add_fetch", nodecl.}
-  proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_sub_fetch", nodecl.}
-  proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_or_fetch ", nodecl.}
-  proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_and_fetch", nodecl.}
-  proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_xor_fetch", nodecl.}
-  proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_nand_fetch ", nodecl.}
-
-  ## Perform the operation return the old value, all memory models are valid
-  proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_add", nodecl.}
-  proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_sub", nodecl.}
-  proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_or", nodecl.}
-  proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_and", nodecl.}
-  proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_xor", nodecl.}
-  proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_nand", nodecl.}
-
-  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
-    importc: "__atomic_test_and_set", nodecl.}
-    ## This built-in function performs an atomic test-and-set operation on the byte at p.
-    ## The byte is set to some implementation defined nonzero “set” value and the return
-    ## value is true if and only if the previous contents were “set”.
-    ## All memory models are valid.
-
-  proc atomicClear*(p: pointer, mem: AtomMemModel) {.
-    importc: "__atomic_clear", nodecl.}
-    ## This built-in function performs an atomic clear operation at p.
-    ## After the operation, at p contains 0.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE
-
-  proc atomicThreadFence*(mem: AtomMemModel) {.
-    importc: "__atomic_thread_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between threads based
-    ## on the specified memory model. All memory orders are valid.
-
-  proc atomicSignalFence*(mem: AtomMemModel) {.
-    importc: "__atomic_signal_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between a thread and
-    ## signal handlers based in the same thread. All memory orders are valid.
-
-  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
-    importc: "__atomic_always_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate
-    ## lock free atomic instructions for the target architecture. size must resolve
-    ## to a compile-time constant and the result also resolves to a compile-time constant.
-    ## ptr is an optional pointer to the object that may be used to determine alignment.
-    ## A value of 0 indicates typical alignment should be used. The compiler may also
-    ## ignore this parameter.
-
-  proc atomicIsLockFree*(size: int, p: pointer): bool {.
-    importc: "__atomic_is_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate
-    ## lock free atomic instructions for the target architecture. If it is not known
-    ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
-    ## ptr is an optional pointer to the object that may be used to determine alignment.
-    ## A value of 0 indicates typical alignment should be used. The compiler may also
-    ## ignore this parameter.
-
-  template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
-elif defined(vcc) and hasThreadSupport:
-  when defined(cpp):
-    when sizeof(int) == 8:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importcpp: "_InterlockedExchangeAdd64(static_cast<NI volatile *>(#), #)",
-        header: "<intrin.h>".}
-    else:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importcpp: "_InterlockedExchangeAdd(reinterpret_cast<LONG volatile *>(#), static_cast<LONG>(#))",
-        header: "<intrin.h>".}
-  else:
-    when sizeof(int) == 8:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importc: "_InterlockedExchangeAdd64", header: "<intrin.h>".}
-    else:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importc: "_InterlockedExchangeAdd", header: "<intrin.h>".}
-
-  proc fence*() {.importc: "_ReadWriteBarrier", header: "<intrin.h>".}
-
-else:
-  proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
-    inc(p[], val)
-    result = p[]
-
-proc atomicInc*(memLoc: var int, x: int = 1): int =
-  when someGcc and hasThreadSupport:
-    result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED)
-  elif defined(vcc) and hasThreadSupport:
-    result = addAndFetch(memLoc.addr, x)
-    inc(result, x)
-  else:
-    inc(memLoc, x)
-    result = memLoc
-
-proc atomicDec*(memLoc: var int, x: int = 1): int =
-  when someGcc and hasThreadSupport:
-    when declared(atomic_sub_fetch):
-      result = atomic_sub_fetch(memLoc.addr, x, ATOMIC_RELAXED)
-    else:
-      result = atomic_add_fetch(memLoc.addr, -x, ATOMIC_RELAXED)
-  elif defined(vcc) and hasThreadSupport:
-    result = addAndFetch(memLoc.addr, -x)
-    dec(result, x)
-  else:
-    dec(memLoc, x)
-    result = memLoc
-
-when defined(vcc):
-  when defined(cpp):
-    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
-      {.importcpp: "_InterlockedCompareExchange64(static_cast<NI64 volatile *>(#), #, #)", header: "<intrin.h>".}
-    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
-      {.importcpp: "_InterlockedCompareExchange(static_cast<NI volatile *>(#), #, #)", header: "<intrin.h>".}
-    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
-      {.importcpp: "_InterlockedCompareExchange8(static_cast<char volatile *>(#), #, #)", header: "<intrin.h>".}
-  else:
-    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
-      {.importc: "_InterlockedCompareExchange64", header: "<intrin.h>".}
-    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
-      {.importc: "_InterlockedCompareExchange", header: "<intrin.h>".}
-    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
-      {.importc: "_InterlockedCompareExchange8", header: "<intrin.h>".}
-
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    when sizeof(T) == 8:
-      interlockedCompareExchange64(p, cast[int64](newValue), cast[int64](oldValue)) ==
-        cast[int64](oldValue)
-    elif sizeof(T) == 4:
-      interlockedCompareExchange32(p, cast[int32](newValue), cast[int32](oldValue)) ==
-        cast[int32](oldValue)
-    elif sizeof(T) == 1:
-      interlockedCompareExchange8(p, cast[byte](newValue), cast[byte](oldValue)) ==
-        cast[byte](oldValue)
-    else:
-      {.error: "invalid CAS instruction".}
-
-elif defined(tcc):
-  when defined(amd64):
-    {.emit:"""
-static int __tcc_cas(int *ptr, int oldVal, int newVal)
-{
-    unsigned char ret;
-    __asm__ __volatile__ (
-            "  lock\n"
-            "  cmpxchgq %2,%1\n"
-            "  sete %0\n"
-            : "=q" (ret), "=m" (*ptr)
-            : "r" (newVal), "m" (*ptr), "a" (oldVal)
-            : "memory");
-
-    if (ret)
-      return 0;
-    else
-      return 1;
-}
-""".}
-  else:
-    #assert sizeof(int) == 4
-    {.emit:"""
-static int __tcc_cas(int *ptr, int oldVal, int newVal)
-{
-    unsigned char ret;
-    __asm__ __volatile__ (
-            "  lock\n"
-            "  cmpxchgl %2,%1\n"
-            "  sete %0\n"
-            : "=q" (ret), "=m" (*ptr)
-            : "r" (newVal), "m" (*ptr), "a" (oldVal)
-            : "memory");
-
-    if (ret)
-      return 0;
-    else
-      return 1;
-}
-""".}
-
-  proc tcc_cas(p: ptr int; oldValue, newValue: int): bool
-    {.importc: "__tcc_cas", nodecl.}
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    tcc_cas(cast[ptr int](p), cast[int](oldValue), cast[int](newValue))
-else:
-  # this is valid for GCC and Intel C++
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
-    {.importc: "__sync_bool_compare_and_swap", nodecl.}
-  # XXX is this valid for 'int'?
-
-
-when (defined(x86) or defined(amd64)) and defined(vcc):
-  proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
-elif (defined(x86) or defined(amd64)) and (someGcc or defined(bcc)):
-  proc cpuRelax* {.inline.} =
-    {.emit: """asm volatile("pause" ::: "memory");""".}
-elif someGcc or defined(tcc):
-  proc cpuRelax* {.inline.} =
-    {.emit: """asm volatile("" ::: "memory");""".}
-elif defined(icl):
-  proc cpuRelax* {.importc: "_mm_pause", header: "xmmintrin.h".}
-elif false:
-  from os import sleep
-
-  proc cpuRelax* {.inline.} = os.sleep(1)
-
-when not declared(fence) and hasThreadSupport:
-  # XXX fixme
-  proc fence*() {.inline.} =
-    var dummy: bool
-    discard cas(addr dummy, false, true)
-
-{.pop.}
diff --git a/lib/system/basic_types.nim b/lib/system/basic_types.nim
new file mode 100644
index 000000000..bf81b9b6a
--- /dev/null
+++ b/lib/system/basic_types.nim
@@ -0,0 +1,94 @@
+type
+  int* {.magic: Int.}         ## Default integer type; bitwidth depends on
+                              ## architecture, but is always the same as a pointer.
+  int8* {.magic: Int8.}       ## Signed 8 bit integer type.
+  int16* {.magic: Int16.}     ## Signed 16 bit integer type.
+  int32* {.magic: Int32.}     ## Signed 32 bit integer type.
+  int64* {.magic: Int64.}     ## Signed 64 bit integer type.
+  uint* {.magic: UInt.}       ## Unsigned default integer type.
+  uint8* {.magic: UInt8.}     ## Unsigned 8 bit integer type.
+  uint16* {.magic: UInt16.}   ## Unsigned 16 bit integer type.
+  uint32* {.magic: UInt32.}   ## Unsigned 32 bit integer type.
+  uint64* {.magic: UInt64.}   ## Unsigned 64 bit integer type.
+
+type
+  float* {.magic: Float.}     ## Default floating point type.
+  float32* {.magic: Float32.} ## 32 bit floating point type.
+  float64* {.magic: Float.}   ## 64 bit floating point type.
+
+# 'float64' is now an alias to 'float'; this solves many problems
+
+type
+  char* {.magic: Char.}         ## Built-in 8 bit character type (unsigned).
+  string* {.magic: String.}     ## Built-in string type.
+  cstring* {.magic: Cstring.}   ## Built-in cstring (*compatible string*) type.
+  pointer* {.magic: Pointer.}   ## Built-in pointer type, use the `addr`
+                                ## operator to get a pointer to a variable.
+
+  typedesc* {.magic: TypeDesc.} ## Meta type to denote a type description.
+
+type
+  `ptr`*[T] {.magic: Pointer.}   ## Built-in generic untraced pointer type.
+  `ref`*[T] {.magic: Pointer.}   ## Built-in generic traced pointer type.
+
+  `nil` {.magic: "Nil".}
+
+  void* {.magic: "VoidType".}    ## Meta type to denote the absence of any type.
+  auto* {.magic: Expr.}          ## Meta type for automatic type determination.
+  any* {.deprecated: "Deprecated since v1.5; Use auto instead.".} = distinct auto  ## Deprecated; Use `auto` instead. See https://github.com/nim-lang/RFCs/issues/281
+  untyped* {.magic: Expr.}       ## Meta type to denote an expression that
+                                 ## is not resolved (for templates).
+  typed* {.magic: Stmt.}         ## Meta type to denote an expression that
+                                 ## is resolved (for templates).
+
+type # we need to start a new type section here, so that ``0`` can have a type
+  bool* {.magic: "Bool".} = enum ## Built-in boolean type.
+    false = 0, true = 1
+
+const
+  on* = true    ## Alias for `true`.
+  off* = false  ## Alias for `false`.
+
+type
+  SomeSignedInt* = int|int8|int16|int32|int64
+    ## Type class matching all signed integer types.
+
+  SomeUnsignedInt* = uint|uint8|uint16|uint32|uint64
+    ## Type class matching all unsigned integer types.
+
+  SomeInteger* = SomeSignedInt|SomeUnsignedInt
+    ## Type class matching all integer types.
+
+  SomeFloat* = float|float32|float64
+    ## Type class matching all floating point number types.
+
+  SomeNumber* = SomeInteger|SomeFloat
+    ## Type class matching all number types.
+
+  SomeOrdinal* = int|int8|int16|int32|int64|bool|enum|uint|uint8|uint16|uint32|uint64
+    ## Type class matching all ordinal types; however this includes enums with
+    ## holes. See also `Ordinal`
+
+
+{.push warning[GcMem]: off, warning[Uninit]: off.}
+{.push hints: off.}
+
+proc `not`*(x: bool): bool {.magic: "Not", noSideEffect.}
+  ## Boolean not; returns true if `x == false`.
+
+proc `and`*(x, y: bool): bool {.magic: "And", noSideEffect.}
+  ## Boolean `and`; returns true if `x == y == true` (if both arguments
+  ## are true).
+  ##
+  ## Evaluation is lazy: if `x` is false, `y` will not even be evaluated.
+proc `or`*(x, y: bool): bool {.magic: "Or", noSideEffect.}
+  ## Boolean `or`; returns true if `not (not x and not y)` (if any of
+  ## the arguments is true).
+  ##
+  ## Evaluation is lazy: if `x` is true, `y` will not even be evaluated.
+proc `xor`*(x, y: bool): bool {.magic: "Xor", noSideEffect.}
+  ## Boolean `exclusive or`; returns true if `x != y` (if either argument
+  ## is true while the other is false).
+
+{.pop.}
+{.pop.}
diff --git a/lib/system/bitmasks.nim b/lib/system/bitmasks.nim
new file mode 100644
index 000000000..0663247c2
--- /dev/null
+++ b/lib/system/bitmasks.nim
@@ -0,0 +1,39 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Page size of the system; in most cases 4096 bytes. For exotic OS or
+# CPU this needs to be changed:
+const
+  PageShift = when defined(nimPage256) or defined(cpu16): 3
+              elif defined(nimPage512): 9
+              elif defined(nimPage1k): 10
+              else: 12 # \ # my tests showed no improvements for using larger page sizes.
+
+  PageSize = 1 shl PageShift
+  PageMask = PageSize-1
+
+
+  MemAlign = # also minimal allocatable memory block
+    when defined(nimMemAlignTiny): 4
+    elif defined(useMalloc):
+      when defined(amd64): 16 
+      else: 8
+    else: 16
+
+  BitsPerPage = PageSize div MemAlign
+  UnitsPerPage = BitsPerPage div (sizeof(int)*8)
+    # how many ints do we need to describe a page:
+    # on 32 bit systems this is only 16 (!)
+
+  TrunkShift = 9
+  BitsPerTrunk = 1 shl TrunkShift # needs to be power of 2 and divisible by 64
+  TrunkMask = BitsPerTrunk - 1
+  IntsPerTrunk = BitsPerTrunk div (sizeof(int)*8)
+  IntShift = 5 + ord(sizeof(int) == 8) # 5 or 6, depending on int width
+  IntMask = 1 shl IntShift - 1
diff --git a/lib/system/cellseqs_v1.nim b/lib/system/cellseqs_v1.nim
new file mode 100644
index 000000000..1a305aa42
--- /dev/null
+++ b/lib/system/cellseqs_v1.nim
@@ -0,0 +1,46 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# ------------------- cell seq handling ---------------------------------------
+
+type
+  PCellArray = ptr UncheckedArray[PCell]
+  CellSeq {.final, pure.} = object
+    len, cap: int
+    d: PCellArray
+
+proc contains(s: CellSeq, c: PCell): bool {.inline.} =
+  for i in 0 ..< s.len:
+    if s.d[i] == c:
+      return true
+  return false
+
+proc resize(s: var CellSeq) =
+  s.cap = s.cap * 3 div 2
+  let d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
+  copyMem(d, s.d, s.len * sizeof(PCell))
+  dealloc(s.d)
+  s.d = d
+
+proc add(s: var CellSeq, c: PCell) {.inline.} =
+  if s.len >= s.cap:
+    resize(s)
+  s.d[s.len] = c
+  inc(s.len)
+
+proc init(s: var CellSeq, cap: int = 1024) =
+  s.len = 0
+  s.cap = cap
+  s.d = cast[PCellArray](alloc0(cap * sizeof(PCell)))
+
+proc deinit(s: var CellSeq) =
+  dealloc(s.d)
+  s.d = nil
+  s.len = 0
+  s.cap = 0
diff --git a/lib/system/cellseqs_v2.nim b/lib/system/cellseqs_v2.nim
new file mode 100644
index 000000000..c6c7b1a8e
--- /dev/null
+++ b/lib/system/cellseqs_v2.nim
@@ -0,0 +1,53 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Cell seqs for cyclebreaker and cyclicrefs_v2.
+
+type
+  CellTuple[T] = (T, PNimTypeV2)
+  CellArray[T] = ptr UncheckedArray[CellTuple[T]]
+  CellSeq[T] = object
+    len, cap: int
+    d: CellArray[T]
+
+proc resize[T](s: var CellSeq[T]) =
+  s.cap = s.cap * 3 div 2
+  var newSize = s.cap * sizeof(CellTuple[T])
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](reallocShared(s.d, newSize))
+  else:
+    s.d = cast[CellArray[T]](realloc(s.d, newSize))
+
+proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =
+  if s.len >= s.cap:
+    s.resize()
+  s.d[s.len] = (c, t)
+  inc(s.len)
+
+proc init[T](s: var CellSeq[T], cap: int = 1024) =
+  s.len = 0
+  s.cap = cap
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](allocShared(uint(s.cap * sizeof(CellTuple[T]))))
+  else:
+    s.d = cast[CellArray[T]](alloc(s.cap * sizeof(CellTuple[T])))
+
+proc deinit[T](s: var CellSeq[T]) =
+  if s.d != nil:
+    when compileOption("threads"):
+      deallocShared(s.d)
+    else:
+      dealloc(s.d)
+    s.d = nil
+  s.len = 0
+  s.cap = 0
+
+proc pop[T](s: var CellSeq[T]): (T, PNimTypeV2) =
+  result = s.d[s.len-1]
+  dec s.len
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index f26cb86ab..92036c226 100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -7,27 +7,69 @@
 #    distribution, for details about the copyright.
 #
 
-# Efficient set of pointers for the GC (and repr)
 
-type
-  RefCount = int
+#[
+
+Efficient set of pointers for the GC (and repr)
+-----------------------------------------------
+
+The GC depends on an extremely efficient datastructure for storing a
+set of pointers - this is called a `CellSet` in the source code.
+Inserting, deleting and searching are done in constant time. However,
+modifying a `CellSet` during traversal leads to undefined behaviour.
+
+All operations on a CellSet have to perform efficiently. Because a Cellset can
+become huge a hash table alone is not suitable for this.
+
+We use a mixture of bitset and hash table for this. The hash table maps *pages*
+to a page descriptor. The page descriptor contains a bit for any possible cell
+address within this page. So including a cell is done as follows:
 
-  Cell {.pure.} = object
-    refcount: RefCount  # the refcount and some flags
-    typ: PNimType
-    when trackAllocationSource:
-      filename: cstring
-      line: int
-    when useCellIds:
-      id: int
+- Find the page descriptor for the page the cell belongs to.
+- Set the appropriate bit in the page descriptor indicating that the
+  cell points to the start of a memory block.
 
-  PCell = ptr Cell
+Removing a cell is analogous - the bit has to be set to zero.
+Single page descriptors are never deleted from the hash table. This is not
+needed as the data structures needs to be rebuilt periodically anyway.
 
+Complete traversal is done in this way::
+
+  for each page descriptor d:
+    for each bit in d:
+      if bit == 1:
+        traverse the pointer belonging to this bit
+
+]#
+
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
+  type
+    PCell = Cell
+
+  when not declaredInScope(PageShift):
+    include bitmasks
+
+else:
+  type
+    RefCount = int
+
+    Cell {.pure.} = object
+      refcount: RefCount  # the refcount and some flags
+      typ: PNimType
+      when trackAllocationSource:
+        filename: cstring
+        line: int
+      when useCellIds:
+        id: int
+
+    PCell = ptr Cell
+
+type
   PPageDesc = ptr PageDesc
   BitIndex = range[0..UnitsPerPage-1]
   PageDesc {.final, pure.} = object
     next: PPageDesc # all nodes are connected with this pointer
-    key: ByteAddress   # start address at bit 0
+    key: uint   # start address at bit 0
     bits: array[BitIndex, int] # a bit vector
 
   PPageDescArray = ptr UncheckedArray[PPageDesc]
@@ -35,40 +77,11 @@ type
     counter, max: int
     head: PPageDesc
     data: PPageDescArray
-  PCellArray = ptr UncheckedArray[PCell]
-  CellSeq {.final, pure.} = object
-    len, cap: int
-    d: PCellArray
-{.deprecated: [TCell: Cell, TBitIndex: BitIndex, TPageDesc: PageDesc,
-              TRefCount: RefCount, TCellSet: CellSet, TCellSeq: CellSeq].}
-# ------------------- cell seq handling ---------------------------------------
-
-proc contains(s: CellSeq, c: PCell): bool {.inline.} =
-  for i in 0 .. s.len-1:
-    if s.d[i] == c: return true
-  return false
-
-proc add(s: var CellSeq, c: PCell) {.inline.} =
-  if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    var d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
-    copyMem(d, s.d, s.len * sizeof(PCell))
-    dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
-  s.d[s.len] = c
-  inc(s.len)
-
-proc init(s: var CellSeq, cap: int = 1024) =
-  s.len = 0
-  s.cap = cap
-  s.d = cast[PCellArray](alloc0(cap * sizeof(PCell)))
-
-proc deinit(s: var CellSeq) =
-  dealloc(s.d)
-  s.d = nil
-  s.len = 0
-  s.cap = 0
+
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
+  discard
+else:
+  include cellseqs_v1
 
 # ------------------- cell set handling ---------------------------------------
 
@@ -98,7 +111,7 @@ proc nextTry(h, maxHash: int): int {.inline.} =
   # generates each int in range(maxHash) exactly once (see any text on
   # random-number generation for proof).
 
-proc cellSetGet(t: CellSet, key: ByteAddress): PPageDesc =
+proc cellSetGet(t: CellSet, key: uint): PPageDesc =
   var h = cast[int](key) and t.max
   while t.data[h] != nil:
     if t.data[h].key == key: return t.data[h]
@@ -123,7 +136,7 @@ proc cellSetEnlarge(t: var CellSet) =
   dealloc(t.data)
   t.data = n
 
-proc cellSetPut(t: var CellSet, key: ByteAddress): PPageDesc =
+proc cellSetPut(t: var CellSet, key: uint): PPageDesc =
   var h = cast[int](key) and t.max
   while true:
     var x = t.data[h]
@@ -147,33 +160,33 @@ proc cellSetPut(t: var CellSet, key: ByteAddress): PPageDesc =
 # ---------- slightly higher level procs --------------------------------------
 
 proc contains(s: CellSet, cell: PCell): bool =
-  var u = cast[ByteAddress](cell)
+  var u = cast[uint](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
   else:
     result = false
 
-proc incl(s: var CellSet, cell: PCell) {.noinline.} =
-  var u = cast[ByteAddress](cell)
+proc incl(s: var CellSet, cell: PCell) =
+  var u = cast[uint](cell)
   var t = cellSetPut(s, u shr PageShift)
-  u = (u %% PageSize) /% MemAlign
+  u = (u mod PageSize) div MemAlign
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
 
 proc excl(s: var CellSet, cell: PCell) =
-  var u = cast[ByteAddress](cell)
+  var u = cast[uint](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     t.bits[u shr IntShift] = (t.bits[u shr IntShift] and
                               not (1 shl (u and IntMask)))
 
 proc containsOrIncl(s: var CellSet, cell: PCell): bool =
-  var u = cast[ByteAddress](cell)
+  var u = cast[uint](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
     if not result:
       t.bits[u shr IntShift] = t.bits[u shr IntShift] or
@@ -186,15 +199,15 @@ iterator elements(t: CellSet): PCell {.inline.} =
   # while traversing it is forbidden to add pointers to the tree!
   var r = t.head
   while r != nil:
-    var i = 0
-    while i <= high(r.bits):
+    var i: uint = 0
+    while int(i) <= high(r.bits):
       var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because
       # modifying operations are not allowed during traversation
-      var j = 0
+      var j: uint = 0
       while w != 0:         # test all remaining bits for zero
         if (w and 1) != 0:  # the bit is set!
           yield cast[PCell]((r.key shl PageShift) or
-                              (i shl IntShift +% j) *% MemAlign)
+                              (i shl IntShift + j) * MemAlign)
         inc(j)
         w = w shr 1
       inc(i)
@@ -239,16 +252,16 @@ iterator elementsExcept(t, s: CellSet): PCell {.inline.} =
   var r = t.head
   while r != nil:
     let ss = cellSetGet(s, r.key)
-    var i = 0
-    while i <= high(r.bits):
+    var i:uint = 0
+    while int(i) <= high(r.bits):
       var w = r.bits[i]
       if ss != nil:
         w = w and not ss.bits[i]
-      var j = 0
+      var j:uint = 0
       while w != 0:
         if (w and 1) != 0:
           yield cast[PCell]((r.key shl PageShift) or
-                              (i shl IntShift +% j) *% MemAlign)
+                              (i shl IntShift + j) * MemAlign)
         inc(j)
         w = w shr 1
       inc(i)
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index 660c68116..9a7645f9b 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -8,14 +8,3 @@
 #
 
 # Headers for procs that the code generator depends on ("compilerprocs")
-
-type
-  LibHandle = pointer       # private type
-  ProcAddr = pointer        # library loading and loading of procs:
-{.deprecated: [TLibHandle: LibHandle, TProcAddr: ProcAddr].}
-
-proc nimLoadLibrary(path: string): LibHandle {.compilerproc.}
-proc nimUnloadLibrary(lib: LibHandle) {.compilerproc.}
-proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.compilerproc.}
-
-proc nimLoadLibraryError(path: string) {.compilerproc, noinline.}
diff --git a/lib/system/channels.nim b/lib/system/channels.nim
deleted file mode 100644
index 254b87dfc..000000000
--- a/lib/system/channels.nim
+++ /dev/null
@@ -1,300 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Channel support for threads. **Note**: This is part of the system module.
-## Do not import it directly. To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## **Note:** The current implementation of message passing does
-## not work with cyclic data structures.
-## **Note:** Channels cannot be passed between threads. Use globals or pass
-## them by `ptr`.
-
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-type
-  pbytes = ptr array[0.. 0xffff, byte]
-  RawChannel {.pure, final.} = object ## msg queue for a thread
-    rd, wr, count, mask, maxItems: int
-    data: pbytes
-    lock: SysLock
-    cond: SysCond
-    elemType: PNimType
-    ready: bool
-    region: MemRegion
-  PRawChannel = ptr RawChannel
-  LoadStoreMode = enum mStore, mLoad
-  Channel* {.gcsafe.}[TMsg] = RawChannel ## a channel for thread communication
-
-const ChannelDeadMask = -2
-
-proc initRawChannel(p: pointer, maxItems: int) =
-  var c = cast[PRawChannel](p)
-  initSysLock(c.lock)
-  initSysCond(c.cond)
-  c.mask = -1
-  c.maxItems = maxItems
-
-proc deinitRawChannel(p: pointer) =
-  var c = cast[PRawChannel](p)
-  # we need to grab the lock to be safe against sending threads!
-  acquireSys(c.lock)
-  c.mask = ChannelDeadMask
-  deallocOsPages(c.region)
-  deinitSys(c.lock)
-  deinitSysCond(c.cond)
-
-proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
-              mode: LoadStoreMode) {.benign.}
-
-proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
-              mode: LoadStoreMode) {.benign.} =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  case n.kind
-  of nkSlot: storeAux(cast[pointer](d +% n.offset),
-                      cast[pointer](s +% n.offset), n.typ, t, mode)
-  of nkList:
-    for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
-  of nkCase:
-    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
-            n.typ.size)
-    var m = selectBranch(src, n)
-    if m != nil: storeAux(dest, src, m, t, mode)
-  of nkNone: sysAssert(false, "storeAux")
-
-proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
-              mode: LoadStoreMode) =
-  template `+!`(p: pointer; x: int): pointer =
-    cast[pointer](cast[int](p) +% x)
-
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  sysAssert(mt != nil, "mt == nil")
-  case mt.kind
-  of tyString:
-    if mode == mStore:
-      var x = cast[PPointer](dest)
-      var s2 = cast[PPointer](s)[]
-      if s2 == nil:
-        x[] = nil
-      else:
-        var ss = cast[NimString](s2)
-        var ns = cast[NimString](alloc(t.region, ss.len+1 + GenericSeqSize))
-        copyMem(ns, ss, ss.len+1 + GenericSeqSize)
-        x[] = ns
-    else:
-      var x = cast[PPointer](dest)
-      var s2 = cast[PPointer](s)[]
-      if s2 == nil:
-        unsureAsgnRef(x, s2)
-      else:
-        let y = copyDeepString(cast[NimString](s2))
-        #echo "loaded ", cast[int](y), " ", cast[string](y)
-        unsureAsgnRef(x, y)
-        dealloc(t.region, s2)
-  of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      sysAssert(dest != nil, "dest == nil")
-      if mode == mStore:
-        x[] = alloc0(t.region, seq.len *% mt.base.size +% GenericSeqSize)
-      else:
-        unsureAsgnRef(x, newSeq(mt, seq.len))
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      var dstseq = cast[PGenericSeq](dst)
-      dstseq.len = seq.len
-      dstseq.reserved = seq.len
-      for i in 0..seq.len-1:
-        storeAux(
-          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                        GenericSeqSize),
-          mt.base, t, mode)
-      if mode != mStore: dealloc(t.region, s2)
-  of tyObject:
-    if mt.base != nil:
-      storeAux(dest, src, mt.base, t, mode)
-    else:
-      # copy type field:
-      var pint = cast[ptr PNimType](dest)
-      pint[] = cast[ptr PNimType](src)[]
-    storeAux(dest, src, mt.node, t, mode)
-  of tyTuple:
-    storeAux(dest, src, mt.node, t, mode)
-  of tyArray, tyArrayConstr:
-    for i in 0..(mt.size div mt.base.size)-1:
-      storeAux(cast[pointer](d +% i*% mt.base.size),
-               cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
-  of tyRef, tyOptAsRef:
-    var s = cast[PPointer](src)[]
-    var x = cast[PPointer](dest)
-    if s == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      #let size = if mt.base.kind == tyObject: cast[ptr PNimType](s)[].size
-      #           else: mt.base.size
-      if mode == mStore:
-        let dyntype = when declared(usrToCell): usrToCell(s).typ
-                      else: mt
-        let size = dyntype.base.size
-        # we store the real dynamic 'ref type' at offset 0, so that
-        # no information is lost
-        let a = alloc0(t.region, size+sizeof(pointer))
-        x[] = a
-        cast[PPointer](a)[] = dyntype
-        storeAux(a +! sizeof(pointer), s, dyntype.base, t, mode)
-      else:
-        let dyntype = cast[ptr PNimType](s)[]
-        var obj = newObj(dyntype, dyntype.base.size)
-        unsureAsgnRef(x, obj)
-        storeAux(x[], s +! sizeof(pointer), dyntype.base, t, mode)
-        dealloc(t.region, s)
-  else:
-    copyMem(dest, src, mt.size) # copy raw bits
-
-proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
-  ## adds an `item` to the end of the queue `q`.
-  var cap = q.mask+1
-  if q.count >= cap:
-    # start with capacity for 2 entries in the queue:
-    if cap == 0: cap = 1
-    var n = cast[pbytes](alloc0(q.region, cap*2*typ.size))
-    var z = 0
-    var i = q.rd
-    var c = q.count
-    while c > 0:
-      dec c
-      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
-      i = (i + 1) and q.mask
-      inc z
-    if q.data != nil: dealloc(q.region, q.data)
-    q.data = n
-    q.mask = cap*2 - 1
-    q.wr = q.count
-    q.rd = 0
-  storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
-  inc q.count
-  q.wr = (q.wr + 1) and q.mask
-
-proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
-  sysAssert q.count > 0, "rawRecv"
-  dec q.count
-  storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
-  q.rd = (q.rd + 1) and q.mask
-
-template lockChannel(q, action): untyped =
-  acquireSys(q.lock)
-  action
-  releaseSys(q.lock)
-
-proc sendImpl(q: PRawChannel, typ: PNimType, msg: pointer, noBlock: bool): bool =
-  if q.mask == ChannelDeadMask:
-    sysFatal(DeadThreadError, "cannot send message; thread died")
-  acquireSys(q.lock)
-  if q.maxItems > 0:
-    # Wait until count is less than maxItems
-    if noBlock and q.count >= q.maxItems:
-      releaseSys(q.lock)
-      return
-
-    while q.count >= q.maxItems:
-      waitSysCond(q.cond, q.lock)
-
-  rawSend(q, msg, typ)
-  q.elemType = typ
-  releaseSys(q.lock)
-  signalSysCond(q.cond)
-  result = true
-
-proc send*[TMsg](c: var Channel[TMsg], msg: TMsg) {.inline.} =
-  ## sends a message to a thread. `msg` is deeply copied.
-  discard sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), false)
-
-proc trySend*[TMsg](c: var Channel[TMsg], msg: TMsg): bool {.inline.} =
-  ## Tries to send a message to a thread. `msg` is deeply copied. Doesn't block.
-  ## Returns `false` if the message was not sent because number of pending items
-  ## in the cannel exceeded `maxItems`.
-  sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), true)
-
-proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
-  q.ready = true
-  while q.count <= 0:
-    waitSysCond(q.cond, q.lock)
-  q.ready = false
-  if typ != q.elemType:
-    releaseSys(q.lock)
-    sysFatal(ValueError, "cannot receive message of wrong type")
-  rawRecv(q, res, typ)
-  if q.maxItems > 0 and q.count == q.maxItems - 1:
-    # Parent thread is awaiting in send. Wake it up.
-    signalSysCond(q.cond)
-
-proc recv*[TMsg](c: var Channel[TMsg]): TMsg =
-  ## receives a message from the channel `c`. This blocks until
-  ## a message has arrived! You may use ``peek`` to avoid the blocking.
-  var q = cast[PRawChannel](addr(c))
-  acquireSys(q.lock)
-  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
-  releaseSys(q.lock)
-
-proc tryRecv*[TMsg](c: var Channel[TMsg]): tuple[dataAvailable: bool,
-                                                  msg: TMsg] =
-  ## Tries to receive a message from the channel `c`, but this can fail
-  ## for all sort of reasons, including contention. If it fails,
-  ## it returns ``(false, default(msg))`` otherwise it
-  ## returns ``(true, msg)``.
-  var q = cast[PRawChannel](addr(c))
-  if q.mask != ChannelDeadMask:
-    if tryAcquireSys(q.lock):
-      if q.count > 0:
-        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
-        result.dataAvailable = true
-      releaseSys(q.lock)
-
-proc peek*[TMsg](c: var Channel[TMsg]): int =
-  ## returns the current number of messages in the channel `c`. Returns -1
-  ## if the channel has been closed. **Note**: This is dangerous to use
-  ## as it encourages races. It's much better to use ``tryRecv`` instead.
-  var q = cast[PRawChannel](addr(c))
-  if q.mask != ChannelDeadMask:
-    lockChannel(q):
-      result = q.count
-  else:
-    result = -1
-
-proc open*[TMsg](c: var Channel[TMsg], maxItems: int = 0) =
-  ## opens a channel `c` for inter thread communication. The `send` operation
-  ## will block until number of unprocessed items is less than `maxItems`.
-  ## For unlimited queue set `maxItems` to 0.
-  initRawChannel(addr(c), maxItems)
-
-proc close*[TMsg](c: var Channel[TMsg]) =
-  ## closes a channel `c` and frees its associated resources.
-  deinitRawChannel(addr(c))
-
-proc ready*[TMsg](c: var Channel[TMsg]): bool =
-  ## returns true iff some thread is waiting on the channel `c` for
-  ## new messages.
-  var q = cast[PRawChannel](addr(c))
-  result = q.ready
-
diff --git a/lib/system/channels_builtin.nim b/lib/system/channels_builtin.nim
new file mode 100644
index 000000000..02b4d8cbf
--- /dev/null
+++ b/lib/system/channels_builtin.nim
@@ -0,0 +1,459 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Channel support for threads.
+##
+## **Note**: This is part of the system module. Do not import it directly.
+## To activate thread support compile with the `--threads:on` command line switch.
+##
+## **Note:** Channels are designed for the `Thread` type. They are unstable when
+## used with `spawn`
+##
+## **Note:** The current implementation of message passing does
+## not work with cyclic data structures.
+##
+## **Note:** Channels cannot be passed between threads. Use globals or pass
+## them by `ptr`.
+##
+## Example
+## =======
+## The following is a simple example of two different ways to use channels:
+## blocking and non-blocking.
+##
+##   ```Nim
+##   # Be sure to compile with --threads:on.
+##   # The channels and threads modules are part of system and should not be
+##   # imported.
+##   import std/os
+##
+##   # Channels can either be:
+##   #  - declared at the module level, or
+##   #  - passed to procedures by ptr (raw pointer) -- see note on safety.
+##   #
+##   # For simplicity, in this example a channel is declared at module scope.
+##   # Channels are generic, and they include support for passing objects between
+##   # threads.
+##   # Note that objects passed through channels will be deeply copied.
+##   var chan: Channel[string]
+##
+##   # This proc will be run in another thread using the threads module.
+##   proc firstWorker() =
+##     chan.send("Hello World!")
+##
+##   # This is another proc to run in a background thread. This proc takes a while
+##   # to send the message since it sleeps for 2 seconds (or 2000 milliseconds).
+##   proc secondWorker() =
+##     sleep(2000)
+##     chan.send("Another message")
+##
+##   # Initialize the channel.
+##   chan.open()
+##
+##   # Launch the worker.
+##   var worker1: Thread[void]
+##   createThread(worker1, firstWorker)
+##
+##   # Block until the message arrives, then print it out.
+##   echo chan.recv() # "Hello World!"
+##
+##   # Wait for the thread to exit before moving on to the next example.
+##   worker1.joinThread()
+##
+##   # Launch the other worker.
+##   var worker2: Thread[void]
+##   createThread(worker2, secondWorker)
+##   # This time, use a non-blocking approach with tryRecv.
+##   # Since the main thread is not blocked, it could be used to perform other
+##   # useful work while it waits for data to arrive on the channel.
+##   while true:
+##     let tried = chan.tryRecv()
+##     if tried.dataAvailable:
+##       echo tried.msg # "Another message"
+##       break
+##
+##     echo "Pretend I'm doing useful work..."
+##     # For this example, sleep in order not to flood stdout with the above
+##     # message.
+##     sleep(400)
+##
+##   # Wait for the second thread to exit before cleaning up the channel.
+##   worker2.joinThread()
+##
+##   # Clean up the channel.
+##   chan.close()
+##   ```
+##
+## Sample output
+## -------------
+## The program should output something similar to this, but keep in mind that
+## exact results may vary in the real world:
+##
+##     Hello World!
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Another message
+##
+## Passing Channels Safely
+## -----------------------
+## Note that when passing objects to procedures on another thread by pointer
+## (for example through a thread's argument), objects created using the default
+## allocator will use thread-local, GC-managed memory. Thus it is generally
+## safer to store channel objects in global variables (as in the above example),
+## in which case they will use a process-wide (thread-safe) shared heap.
+##
+## However, it is possible to manually allocate shared memory for channels
+## using e.g. `system.allocShared0` and pass these pointers through thread
+## arguments:
+##
+##   ```Nim
+##   proc worker(channel: ptr Channel[string]) =
+##     let greeting = channel[].recv()
+##     echo greeting
+##
+##   proc localChannelExample() =
+##     # Use allocShared0 to allocate some shared-heap memory and zero it.
+##     # The usual warnings about dealing with raw pointers apply. Exercise caution.
+##     var channel = cast[ptr Channel[string]](
+##       allocShared0(sizeof(Channel[string]))
+##     )
+##     channel[].open()
+##     # Create a thread which will receive the channel as an argument.
+##     var thread: Thread[ptr Channel[string]]
+##     createThread(thread, worker, channel)
+##     channel[].send("Hello from the main thread!")
+##     # Clean up resources.
+##     thread.joinThread()
+##     channel[].close()
+##     deallocShared(channel)
+##
+##   localChannelExample() # "Hello from the main thread!"
+##   ```
+
+when not declared(ThisIsSystem):
+  {.error: "You must not import this module explicitly".}
+
+import std/private/syslocks
+
+type
+  pbytes = ptr UncheckedArray[byte]
+  RawChannel {.pure, final.} = object ## msg queue for a thread
+    rd, wr, count, mask, maxItems: int
+    data: pbytes
+    lock: SysLock
+    cond: SysCond
+    elemType: PNimType
+    ready: bool
+    when not usesDestructors:
+      region: MemRegion
+  PRawChannel = ptr RawChannel
+  LoadStoreMode = enum mStore, mLoad
+  Channel*[TMsg] {.gcsafe.} = RawChannel ## a channel for thread communication
+
+const ChannelDeadMask = -2
+
+proc initRawChannel(p: pointer, maxItems: int) =
+  var c = cast[PRawChannel](p)
+  initSysLock(c.lock)
+  initSysCond(c.cond)
+  c.mask = -1
+  c.maxItems = maxItems
+
+proc deinitRawChannel(p: pointer) =
+  var c = cast[PRawChannel](p)
+  # we need to grab the lock to be safe against sending threads!
+  acquireSys(c.lock)
+  c.mask = ChannelDeadMask
+  when not usesDestructors:
+    deallocOsPages(c.region)
+  else:
+    if c.data != nil: deallocShared(c.data)
+  deinitSys(c.lock)
+  deinitSysCond(c.cond)
+
+when not usesDestructors:
+
+  proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+                mode: LoadStoreMode) {.benign.}
+
+  proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
+                mode: LoadStoreMode) {.benign.} =
+    var
+      d = cast[int](dest)
+      s = cast[int](src)
+    case n.kind
+    of nkSlot: storeAux(cast[pointer](d +% n.offset),
+                        cast[pointer](s +% n.offset), n.typ, t, mode)
+    of nkList:
+      for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
+    of nkCase:
+      copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+              n.typ.size)
+      var m = selectBranch(src, n)
+      if m != nil: storeAux(dest, src, m, t, mode)
+    of nkNone: sysAssert(false, "storeAux")
+
+  proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+                mode: LoadStoreMode) =
+    template `+!`(p: pointer; x: int): pointer =
+      cast[pointer](cast[int](p) +% x)
+
+    var
+      d = cast[int](dest)
+      s = cast[int](src)
+    sysAssert(mt != nil, "mt == nil")
+    case mt.kind
+    of tyString:
+      if mode == mStore:
+        var x = cast[PPointer](dest)
+        var s2 = cast[PPointer](s)[]
+        if s2 == nil:
+          x[] = nil
+        else:
+          var ss = cast[NimString](s2)
+          var ns = cast[NimString](alloc(t.region, GenericSeqSize + ss.len+1))
+          copyMem(ns, ss, ss.len+1 + GenericSeqSize)
+          x[] = ns
+      else:
+        var x = cast[PPointer](dest)
+        var s2 = cast[PPointer](s)[]
+        if s2 == nil:
+          unsureAsgnRef(x, s2)
+        else:
+          let y = copyDeepString(cast[NimString](s2))
+          #echo "loaded ", cast[int](y), " ", cast[string](y)
+          unsureAsgnRef(x, y)
+          dealloc(t.region, s2)
+    of tySequence:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        if mode == mStore:
+          x[] = nil
+        else:
+          unsureAsgnRef(x, nil)
+      else:
+        sysAssert(dest != nil, "dest == nil")
+        if mode == mStore:
+          x[] = alloc0(t.region, align(GenericSeqSize, mt.base.align) +% seq.len *% mt.base.size)
+        else:
+          unsureAsgnRef(x, newSeq(mt, seq.len))
+        var dst = cast[int](cast[PPointer](dest)[])
+        var dstseq = cast[PGenericSeq](dst)
+        dstseq.len = seq.len
+        dstseq.reserved = seq.len
+        for i in 0..seq.len-1:
+          storeAux(
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +%
+                          i *% mt.base.size),
+            mt.base, t, mode)
+        if mode != mStore: dealloc(t.region, s2)
+    of tyObject:
+      if mt.base != nil:
+        storeAux(dest, src, mt.base, t, mode)
+      else:
+        # copy type field:
+        var pint = cast[ptr PNimType](dest)
+        pint[] = cast[ptr PNimType](src)[]
+      storeAux(dest, src, mt.node, t, mode)
+    of tyTuple:
+      storeAux(dest, src, mt.node, t, mode)
+    of tyArray, tyArrayConstr:
+      for i in 0..(mt.size div mt.base.size)-1:
+        storeAux(cast[pointer](d +% i *% mt.base.size),
+                cast[pointer](s +% i *% mt.base.size), mt.base, t, mode)
+    of tyRef:
+      var s = cast[PPointer](src)[]
+      var x = cast[PPointer](dest)
+      if s == nil:
+        if mode == mStore:
+          x[] = nil
+        else:
+          unsureAsgnRef(x, nil)
+      else:
+        #let size = if mt.base.kind == tyObject: cast[ptr PNimType](s)[].size
+        #           else: mt.base.size
+        if mode == mStore:
+          let dyntype = when declared(usrToCell): usrToCell(s).typ
+                        else: mt
+          let size = dyntype.base.size
+          # we store the real dynamic 'ref type' at offset 0, so that
+          # no information is lost
+          let a = alloc0(t.region, size+sizeof(pointer))
+          x[] = a
+          cast[PPointer](a)[] = dyntype
+          storeAux(a +! sizeof(pointer), s, dyntype.base, t, mode)
+        else:
+          let dyntype = cast[ptr PNimType](s)[]
+          var obj = newObj(dyntype, dyntype.base.size)
+          unsureAsgnRef(x, obj)
+          storeAux(x[], s +! sizeof(pointer), dyntype.base, t, mode)
+          dealloc(t.region, s)
+    else:
+      copyMem(dest, src, mt.size) # copy raw bits
+
+proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
+  ## Adds an `item` to the end of the queue `q`.
+  var cap = q.mask+1
+  if q.count >= cap:
+    # start with capacity for 2 entries in the queue:
+    if cap == 0: cap = 1
+    when not usesDestructors:
+      var n = cast[pbytes](alloc0(q.region, cap*2*typ.size))
+    else:
+      var n = cast[pbytes](allocShared0(cap*2*typ.size))
+    var z = 0
+    var i = q.rd
+    var c = q.count
+    while c > 0:
+      dec c
+      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
+      i = (i + 1) and q.mask
+      inc z
+    if q.data != nil:
+      when not usesDestructors:
+        dealloc(q.region, q.data)
+      else:
+        deallocShared(q.data)
+    q.data = n
+    q.mask = cap*2 - 1
+    q.wr = q.count
+    q.rd = 0
+  when not usesDestructors:
+    storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
+  else:
+    copyMem(addr(q.data[q.wr * typ.size]), data, typ.size)
+  inc q.count
+  q.wr = (q.wr + 1) and q.mask
+
+proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
+  sysAssert q.count > 0, "rawRecv"
+  dec q.count
+  when not usesDestructors:
+    storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
+  else:
+    copyMem(data, addr(q.data[q.rd * typ.size]), typ.size)
+  q.rd = (q.rd + 1) and q.mask
+
+template lockChannel(q, action): untyped =
+  acquireSys(q.lock)
+  action
+  releaseSys(q.lock)
+
+proc sendImpl(q: PRawChannel, typ: PNimType, msg: pointer, noBlock: bool): bool =
+  if q.mask == ChannelDeadMask:
+    sysFatal(DeadThreadDefect, "cannot send message; thread died")
+  acquireSys(q.lock)
+  if q.maxItems > 0:
+    # Wait until count is less than maxItems
+    if noBlock and q.count >= q.maxItems:
+      releaseSys(q.lock)
+      return
+
+    while q.count >= q.maxItems:
+      waitSysCond(q.cond, q.lock)
+
+  rawSend(q, msg, typ)
+  q.elemType = typ
+  signalSysCond(q.cond)
+  releaseSys(q.lock)
+  result = true
+
+proc send*[TMsg](c: var Channel[TMsg], msg: sink TMsg) {.inline.} =
+  ## Sends a message to a thread. `msg` is deeply copied.
+  discard sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), false)
+  when defined(gcDestructors):
+    wasMoved(msg)
+
+proc trySend*[TMsg](c: var Channel[TMsg], msg: sink TMsg): bool {.inline.} =
+  ## Tries to send a message to a thread.
+  ##
+  ## `msg` is deeply copied. Doesn't block.
+  ##
+  ## Returns `false` if the message was not sent because number of pending items
+  ## in the channel exceeded `maxItems`.
+  result = sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), true)
+  when defined(gcDestructors):
+    if result:
+      wasMoved(msg)
+
+proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
+  q.ready = true
+  while q.count <= 0:
+    waitSysCond(q.cond, q.lock)
+  q.ready = false
+  if typ != q.elemType:
+    releaseSys(q.lock)
+    raise newException(ValueError, "cannot receive message of wrong type")
+  rawRecv(q, res, typ)
+  if q.maxItems > 0 and q.count == q.maxItems - 1:
+    # Parent thread is awaiting in send. Wake it up.
+    signalSysCond(q.cond)
+
+proc recv*[TMsg](c: var Channel[TMsg]): TMsg =
+  ## Receives a message from the channel `c`.
+  ##
+  ## This blocks until a message has arrived!
+  ## You may use `peek proc <#peek,Channel[TMsg]>`_ to avoid the blocking.
+  var q = cast[PRawChannel](addr(c))
+  acquireSys(q.lock)
+  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
+  releaseSys(q.lock)
+
+proc tryRecv*[TMsg](c: var Channel[TMsg]): tuple[dataAvailable: bool,
+                                                  msg: TMsg] =
+  ## Tries to receive a message from the channel `c`, but this can fail
+  ## for all sort of reasons, including contention.
+  ##
+  ## If it fails, it returns `(false, default(msg))` otherwise it
+  ## returns `(true, msg)`.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
+    if tryAcquireSys(q.lock):
+      if q.count > 0:
+        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
+        result.dataAvailable = true
+      releaseSys(q.lock)
+
+proc peek*[TMsg](c: var Channel[TMsg]): int =
+  ## Returns the current number of messages in the channel `c`.
+  ##
+  ## Returns -1 if the channel has been closed.
+  ##
+  ## **Note**: This is dangerous to use as it encourages races.
+  ## It's much better to use `tryRecv proc <#tryRecv,Channel[TMsg]>`_ instead.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
+    lockChannel(q):
+      result = q.count
+  else:
+    result = -1
+
+proc open*[TMsg](c: var Channel[TMsg], maxItems: int = 0) =
+  ## Opens a channel `c` for inter thread communication.
+  ##
+  ## The `send` operation will block until number of unprocessed items is
+  ## less than `maxItems`.
+  ##
+  ## For unlimited queue set `maxItems` to 0.
+  initRawChannel(addr(c), maxItems)
+
+proc close*[TMsg](c: var Channel[TMsg]) =
+  ## Closes a channel `c` and frees its associated resources.
+  deinitRawChannel(addr(c))
+
+proc ready*[TMsg](c: var Channel[TMsg]): bool =
+  ## Returns true if some thread is waiting on the channel `c` for
+  ## new messages.
+  var q = cast[PRawChannel](addr(c))
+  result = q.ready
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index d3651f659..b48855964 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -8,24 +8,72 @@
 #
 
 # Implementation of some runtime checks.
+include system/indexerrors
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
 
 proc raiseRangeError(val: BiggestInt) {.compilerproc, noinline.} =
   when hostOS == "standalone":
-    sysFatal(RangeError, "value out of range")
+    sysFatal(RangeDefect, "value out of range")
   else:
-    sysFatal(RangeError, "value out of range: ", $val)
+    sysFatal(RangeDefect, "value out of range: ", $val)
+
+proc raiseIndexError4(l1, h1, h2: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, "index out of bounds: " & $l1 & ".." & $h1 & " notin 0.." & $(h2 - 1))
+
+proc raiseIndexError3(i, a, b: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, formatErrorIndexBound(i, a, b))
+
+proc raiseIndexError2(i, n: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, formatErrorIndexBound(i, n))
 
 proc raiseIndexError() {.compilerproc, noinline.} =
-  sysFatal(IndexError, "index out of bounds")
+  sysFatal(IndexDefect, "index out of bounds")
 
 proc raiseFieldError(f: string) {.compilerproc, noinline.} =
-  sysFatal(FieldError, f, " is not accessible")
+  ## remove after bootstrap > 1.5.1
+  sysFatal(FieldDefect, f)
+
+when defined(nimV2):
+  proc raiseFieldError2(f: string, discVal: int) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, f & $discVal & "'")
+
+  proc raiseFieldErrorStr(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+else:
+  proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+
+proc raiseRangeErrorI(i, a, b: BiggestInt) {.compilerproc, noinline.} =
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+
+proc raiseRangeErrorF(i, a, b: float) {.compilerproc, noinline.} =
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+
+proc raiseRangeErrorU(i, a, b: uint64) {.compilerproc, noinline.} =
+  # todo: better error reporting
+  sysFatal(RangeDefect, "value out of range")
+
+proc raiseRangeErrorNoArgs() {.compilerproc, noinline.} =
+  sysFatal(RangeDefect, "value out of range")
+
+proc raiseObjectConversionError() {.compilerproc, noinline.} =
+  sysFatal(ObjectConversionDefect, "invalid object conversion")
 
 proc chckIndx(i, a, b: int): int =
   if i >= a and i <= b:
     return i
   else:
-    raiseIndexError()
+    raiseIndexError3(i, a, b)
 
 proc chckRange(i, a, b: int): int =
   if i >= a and i <= b:
@@ -39,68 +87,75 @@ proc chckRange64(i, a, b: int64): int64 {.compilerproc.} =
   else:
     raiseRangeError(i)
 
+proc chckRangeU(i, a, b: uint64): uint64 {.compilerproc.} =
+  if i >= a and i <= b:
+    return i
+  else:
+    sysFatal(RangeDefect, "value out of range")
+
 proc chckRangeF(x, a, b: float): float =
   if x >= a and x <= b:
     return x
   else:
     when hostOS == "standalone":
-      sysFatal(RangeError, "value out of range")
+      sysFatal(RangeDefect, "value out of range")
     else:
-      sysFatal(RangeError, "value out of range: ", $x)
+      sysFatal(RangeDefect, "value out of range: ", $x)
 
 proc chckNil(p: pointer) =
   if p == nil:
-    sysFatal(NilAccessError, "attempt to write to a nil address")
-
-when defined(nimNewRuntime):
-  proc chckMove(b: bool) {.compilerproc.} =
-    if not b:
-      sysFatal(MoveError, "attempt to access an object that was moved")
+    sysFatal(NilAccessDefect, "attempt to write to a nil address")
 
 proc chckNilDisp(p: pointer) {.compilerproc.} =
   if p == nil:
-    sysFatal(NilAccessError, "cannot dispatch; dispatcher is nil")
-
-proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return # optimized fast path
-  while x != subclass:
-    if x == nil:
-      sysFatal(ObjectConversionError, "invalid object conversion")
-    x = x.base
-
-proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} =
-  if a != b:
-    sysFatal(ObjectAssignmentError, "invalid object assignment")
-
-type ObjCheckCache = array[0..1, PNimType]
-
-proc isObjSlowPath(obj, subclass: PNimType;
-                   cache: var ObjCheckCache): bool {.noinline.} =
-  # checks if obj is of type subclass:
-  var x = obj.base
-  while x != subclass:
-    if x == nil:
-      cache[0] = obj
-      return false
-    x = x.base
-  cache[1] = obj
-  return true
-
-proc isObjWithCache(obj, subclass: PNimType;
-                    cache: var ObjCheckCache): bool {.compilerProc, inline.} =
-  if obj == subclass: return true
-  if obj.base == subclass: return true
-  if cache[0] == obj: return false
-  if cache[1] == obj: return true
-  return isObjSlowPath(obj, subclass, cache)
-
-proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return true # optimized fast path
-  while x != subclass:
-    if x == nil: return false
-    x = x.base
-  return true
+    sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
+
+when not defined(nimV2):
+
+  proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
+    # checks if obj is of type subclass:
+    var x = obj
+    if x == subclass: return # optimized fast path
+    while x != subclass:
+      if x == nil:
+        sysFatal(ObjectConversionDefect, "invalid object conversion")
+      x = x.base
+
+  proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} =
+    if a != b:
+      sysFatal(ObjectAssignmentDefect, "invalid object assignment")
+
+  type ObjCheckCache = array[0..1, PNimType]
+
+  proc isObjSlowPath(obj, subclass: PNimType;
+                    cache: var ObjCheckCache): bool {.noinline.} =
+    # checks if obj is of type subclass:
+    var x = obj.base
+    while x != subclass:
+      if x == nil:
+        cache[0] = obj
+        return false
+      x = x.base
+    cache[1] = obj
+    return true
+
+  proc isObjWithCache(obj, subclass: PNimType;
+                      cache: var ObjCheckCache): bool {.compilerproc, inline.} =
+    if obj == subclass: return true
+    if obj.base == subclass: return true
+    if cache[0] == obj: return false
+    if cache[1] == obj: return true
+    return isObjSlowPath(obj, subclass, cache)
+
+  proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
+    # checks if obj is of type subclass:
+    var x = obj
+    if x == subclass: return true # optimized fast path
+    while x != subclass:
+      if x == nil: return false
+      x = x.base
+    return true
+
+when defined(nimV2):
+  proc raiseObjectCaseTransition() {.compilerproc.} =
+    sysFatal(FieldDefect, "assignment to discriminant changes object branch")
diff --git a/lib/system/comparisons.nim b/lib/system/comparisons.nim
new file mode 100644
index 000000000..a8d78bb93
--- /dev/null
+++ b/lib/system/comparisons.nim
@@ -0,0 +1,337 @@
+# comparison operators:
+proc `==`*[Enum: enum](x, y: Enum): bool {.magic: "EqEnum", noSideEffect.} =
+  ## Checks whether values within the *same enum* have the same underlying value.
+  runnableExamples:
+    type
+      Enum1 = enum
+        field1 = 3, field2
+      Enum2 = enum
+        place1, place2 = 3
+    var
+      e1 = field1
+      e2 = place2.ord.Enum1
+    assert e1 == e2
+    assert not compiles(e1 == place2) # raises error
+proc `==`*(x, y: pointer): bool {.magic: "EqRef", noSideEffect.} =
+  ## Checks for equality between two `pointer` variables.
+  runnableExamples:
+    var # this is a wildly dangerous example
+      a = cast[pointer](0)
+      b = cast[pointer](nil)
+    assert a == b # true due to the special meaning of `nil`/0 as a pointer
+proc `==`*(x, y: string): bool {.magic: "EqStr", noSideEffect.}
+  ## Checks for equality between two `string` variables.
+
+proc `==`*(x, y: char): bool {.magic: "EqCh", noSideEffect.}
+  ## Checks for equality between two `char` variables.
+proc `==`*(x, y: bool): bool {.magic: "EqB", noSideEffect.}
+  ## Checks for equality between two `bool` variables.
+proc `==`*[T](x, y: set[T]): bool {.magic: "EqSet", noSideEffect.} =
+  ## Checks for equality between two variables of type `set`.
+  runnableExamples:
+    assert {1, 2, 2, 3} == {1, 2, 3} # duplication in sets is ignored
+
+proc `==`*[T](x, y: ref T): bool {.magic: "EqRef", noSideEffect.}
+  ## Checks that two `ref` variables refer to the same item.
+proc `==`*[T](x, y: ptr T): bool {.magic: "EqRef", noSideEffect.}
+  ## Checks that two `ptr` variables refer to the same item.
+proc `==`*[T: proc | iterator](x, y: T): bool {.magic: "EqProc", noSideEffect.}
+  ## Checks that two `proc` variables refer to the same procedure.
+
+proc `<=`*[Enum: enum](x, y: Enum): bool {.magic: "LeEnum", noSideEffect.}
+proc `<=`*(x, y: string): bool {.magic: "LeStr", noSideEffect.} =
+  ## Compares two strings and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*(x, y: char): bool {.magic: "LeCh", noSideEffect.} =
+  ## Compares two chars and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*[T](x, y: set[T]): bool {.magic: "LeSet", noSideEffect.} =
+  ## Returns true if `x` is a subset of `y`.
+  ##
+  ## A subset `x` has all of its members in `y` and `y` doesn't necessarily
+  ## have more members than `x`. That is, `x` can be equal to `y`.
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*(x, y: bool): bool {.magic: "LeB", noSideEffect.}
+proc `<=`*[T](x, y: ref T): bool {.magic: "LePtr", noSideEffect.}
+proc `<=`*(x, y: pointer): bool {.magic: "LePtr", noSideEffect.}
+
+proc `<`*[Enum: enum](x, y: Enum): bool {.magic: "LtEnum", noSideEffect.}
+proc `<`*(x, y: string): bool {.magic: "LtStr", noSideEffect.} =
+  ## Compares two strings and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*(x, y: char): bool {.magic: "LtCh", noSideEffect.} =
+  ## Compares two chars and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*[T](x, y: set[T]): bool {.magic: "LtSet", noSideEffect.} =
+  ## Returns true if `x` is a strict or proper subset of `y`.
+  ##
+  ## A strict or proper subset `x` has all of its members in `y` but `y` has
+  ## more elements than `y`.
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*(x, y: bool): bool {.magic: "LtB", noSideEffect.}
+proc `<`*[T](x, y: ref T): bool {.magic: "LtPtr", noSideEffect.}
+proc `<`*[T](x, y: ptr T): bool {.magic: "LtPtr", noSideEffect.}
+proc `<`*(x, y: pointer): bool {.magic: "LtPtr", noSideEffect.}
+
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template `!=`*(x, y: untyped): untyped {.callsite.} =
+  ## Unequals operator. This is a shorthand for `not (x == y)`.
+  not (x == y)
+
+template `>=`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater or equals" operator. This is the same as `y <= x`.
+  y <= x
+
+template `>`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater" operator. This is the same as `y < x`.
+  y < x
+
+
+proc `==`*(x, y: int): bool {.magic: "EqI", noSideEffect.}
+  ## Compares two integers for equality.
+proc `==`*(x, y: int8): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int16): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int32): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int64): bool {.magic: "EqI", noSideEffect.}
+
+proc `<=`*(x, y: int): bool {.magic: "LeI", noSideEffect.}
+  ## Returns true if `x` is less than or equal to `y`.
+proc `<=`*(x, y: int8): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int16): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int32): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int64): bool {.magic: "LeI", noSideEffect.}
+
+proc `<`*(x, y: int): bool {.magic: "LtI", noSideEffect.}
+  ## Returns true if `x` is less than `y`.
+proc `<`*(x, y: int8): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int16): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int32): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int64): bool {.magic: "LtI", noSideEffect.}
+
+proc `<=`*(x, y: uint): bool {.magic: "LeU", noSideEffect.}
+  ## Returns true if `x <= y`.
+proc `<=`*(x, y: uint8): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint16): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint32): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint64): bool {.magic: "LeU", noSideEffect.}
+
+proc `<`*(x, y: uint): bool {.magic: "LtU", noSideEffect.}
+  ## Returns true if `x < y`.
+proc `<`*(x, y: uint8): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint16): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint32): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint64): bool {.magic: "LtU", noSideEffect.}
+
+proc `<=%`*(x, y: int): bool {.inline.} =
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) <= unsigned(y)`.
+  cast[uint](x) <= cast[uint](y)
+proc `<=%`*(x, y: int8): bool {.inline.} = cast[uint8](x) <= cast[uint8](y)
+proc `<=%`*(x, y: int16): bool {.inline.} = cast[uint16](x) <= cast[uint16](y)
+proc `<=%`*(x, y: int32): bool {.inline.} = cast[uint32](x) <= cast[uint32](y)
+proc `<=%`*(x, y: int64): bool {.inline.} = cast[uint64](x) <= cast[uint64](y)
+
+proc `<%`*(x, y: int): bool {.inline.} =
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) < unsigned(y)`.
+  cast[uint](x) < cast[uint](y)
+proc `<%`*(x, y: int8): bool {.inline.} = cast[uint8](x) < cast[uint8](y)
+proc `<%`*(x, y: int16): bool {.inline.} = cast[uint16](x) < cast[uint16](y)
+proc `<%`*(x, y: int32): bool {.inline.} = cast[uint32](x) < cast[uint32](y)
+proc `<%`*(x, y: int64): bool {.inline.} = cast[uint64](x) < cast[uint64](y)
+
+template `>=%`*(x, y: untyped): untyped = y <=% x
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) >= unsigned(y)`.
+
+template `>%`*(x, y: untyped): untyped = y <% x
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) > unsigned(y)`.
+
+proc `==`*(x, y: uint): bool {.magic: "EqI", noSideEffect.}
+  ## Compares two unsigned integers for equality.
+proc `==`*(x, y: uint8): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint16): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint32): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint64): bool {.magic: "EqI", noSideEffect.}
+
+proc `<=`*(x, y: float32): bool {.magic: "LeF64", noSideEffect.}
+proc `<=`*(x, y: float): bool {.magic: "LeF64", noSideEffect.}
+
+proc `<`*(x, y: float32): bool {.magic: "LtF64", noSideEffect.}
+proc `<`*(x, y: float): bool {.magic: "LtF64", noSideEffect.}
+
+proc `==`*(x, y: float32): bool {.magic: "EqF64", noSideEffect.}
+proc `==`*(x, y: float): bool {.magic: "EqF64", noSideEffect.}
+
+{.push stackTrace: off.}
+
+proc min*(x, y: int): int {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int8): int8 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int16): int16 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int32): int32 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int64): int64 {.magic: "MinI", noSideEffect.} =
+  ## The minimum value of two integers.
+  if x <= y: x else: y
+proc min*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic minimum operator of 2 values based on `<=`.
+  if x <= y: x else: y
+
+proc max*(x, y: int): int {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int8): int8 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int16): int16 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int32): int32 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int64): int64 {.magic: "MaxI", noSideEffect.} =
+  ## The maximum value of two integers.
+  if y <= x: x else: y
+proc max*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic maximum operator of 2 values based on `<=`.
+  if y <= x: x else: y
+
+
+proc min*[T](x: openArray[T]): T =
+  ## The minimum value of `x`. `T` needs to have a `<` operator.
+  result = x[0]
+  for i in 1..high(x):
+    if x[i] < result: result = x[i]
+
+proc max*[T](x: openArray[T]): T =
+  ## The maximum value of `x`. `T` needs to have a `<` operator.
+  result = x[0]
+  for i in 1..high(x):
+    if result < x[i]: result = x[i]
+
+{.pop.} # stackTrace: off
+
+
+proc clamp*[T](x, a, b: T): T =
+  ## Limits the value `x` within the interval \[a, b].
+  ## This proc is equivalent to but faster than `max(a, min(b, x))`.
+  ## 
+  ## .. warning:: `a <= b` is assumed and will not be checked (currently).
+  ##
+  ## **See also:**
+  ## `math.clamp` for a version that takes a `Slice[T]` instead.
+  runnableExamples:
+    assert (1.4).clamp(0.0, 1.0) == 1.0
+    assert (0.5).clamp(0.0, 1.0) == 0.5
+    assert 4.clamp(1, 3) == max(1, min(3, 4))
+  if x < a: return a
+  if x > b: return b
+  return x
+
+
+proc `==`*[I, T](x, y: array[I, T]): bool =
+  for f in low(x)..high(x):
+    if x[f] != y[f]:
+      return
+  result = true
+
+proc `==`*[T](x, y: openArray[T]): bool =
+  if x.len != y.len:
+    return false
+  for f in low(x)..high(x):
+    if x[f] != y[f]:
+      return false
+  result = true
+
+
+proc `==`*[T](x, y: seq[T]): bool {.noSideEffect.} =
+  ## Generic equals operator for sequences: relies on a equals operator for
+  ## the element type `T`.
+  when nimvm:
+    if x.len == 0 and y.len == 0:
+      return true
+  else:
+    when not defined(js):
+      proc seqToPtr[T](x: seq[T]): pointer {.inline, noSideEffect.} =
+        when defined(nimSeqsV2):
+          result = cast[NimSeqV2[T]](x).p
+        else:
+          result = cast[pointer](x)
+
+      if seqToPtr(x) == seqToPtr(y):
+        return true
+    else:
+      var sameObject = false
+      {.emit: """`sameObject` = `x` === `y`;""".}
+      if sameObject: return true
+
+  if x.len != y.len:
+    return false
+
+  for i in 0..x.len-1:
+    if x[i] != y[i]:
+      return false
+
+  return true
diff --git a/lib/system/compilation.nim b/lib/system/compilation.nim
new file mode 100644
index 000000000..cdb976ed5
--- /dev/null
+++ b/lib/system/compilation.nim
@@ -0,0 +1,209 @@
+const
+  NimMajor* {.intdefine.}: int = 2
+    ## is the major number of Nim's version. Example:
+    ##   ```nim
+    ##   when (NimMajor, NimMinor, NimPatch) >= (1, 3, 1): discard
+    ##   ```
+    # see also std/private/since
+
+  NimMinor* {.intdefine.}: int = 2
+    ## is the minor number of Nim's version.
+    ## Odd for devel, even for releases.
+
+  NimPatch* {.intdefine.}: int = 1
+    ## is the patch number of Nim's version.
+    ## Odd for devel, even for releases.
+
+{.push profiler: off.}
+let nimvm* {.magic: "Nimvm", compileTime.}: bool = false
+  ## May be used only in `when` expression.
+  ## It is true in Nim VM context and false otherwise.
+{.pop.}
+
+const
+  isMainModule* {.magic: "IsMainModule".}: bool = false
+    ## True only when accessed in the main module. This works thanks to
+    ## compiler magic. It is useful to embed testing code in a module.
+
+  CompileDate* {.magic: "CompileDate".}: string = "0000-00-00"
+    ## The date (in UTC) of compilation as a string of the form
+    ## `YYYY-MM-DD`. This works thanks to compiler magic.
+
+  CompileTime* {.magic: "CompileTime".}: string = "00:00:00"
+    ## The time (in UTC) of compilation as a string of the form
+    ## `HH:MM:SS`. This works thanks to compiler magic.
+
+proc defined*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## defined.
+  ##
+  ## `x` is an external symbol introduced through the compiler's
+  ## `-d:x switch <nimc.html#compiler-usage-compileminustime-symbols>`_ to enable
+  ## build time conditionals:
+  ##   ```nim
+  ##   when not defined(release):
+  ##     # Do here programmer friendly expensive sanity checks.
+  ##   # Put here the normal code
+  ##   ```
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `define pragmas <manual.html#implementation-specific-pragmas-compileminustime-define-pragmas>`_
+
+proc declared*(x: untyped): bool {.magic: "Declared", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared. `x` has to be an identifier or a qualified identifier.
+  ##
+  ## This can be used to check whether a library provides a certain
+  ## feature or not:
+  ##   ```nim
+  ##   when not declared(strutils.toUpper):
+  ##     # provide our own toUpper proc here, because strutils is
+  ##     # missing it.
+  ##   ```
+  ##
+  ## See also:
+  ## * `declaredInScope <#declaredInScope,untyped>`_
+
+proc declaredInScope*(x: untyped): bool {.magic: "DeclaredInScope", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared in the current scope. `x` has to be an identifier.
+
+proc compiles*(x: untyped): bool {.magic: "Compiles", noSideEffect, compileTime.} =
+  ## Special compile-time procedure that checks whether `x` can be compiled
+  ## without any semantic error.
+  ## This can be used to check whether a type supports some operation:
+  ##   ```nim
+  ##   when compiles(3 + 4):
+  ##     echo "'+' for integers is available"
+  ##   ```
+  discard
+
+proc astToStr*[T](x: T): string {.magic: "AstToStr", noSideEffect.}
+  ## Converts the AST of `x` into a string representation. This is very useful
+  ## for debugging.
+
+proc runnableExamples*(rdoccmd = "", body: untyped) {.magic: "RunnableExamples".} =
+  ## A section you should use to mark `runnable example`:idx: code with.
+  ##
+  ## - In normal debug and release builds code within
+  ##   a `runnableExamples` section is ignored.
+  ## - The documentation generator is aware of these examples and considers them
+  ##   part of the `##` doc comment. As the last step of documentation
+  ##   generation each runnableExample is put in its own file `$file_examples$i.nim`,
+  ##   compiled and tested. The collected examples are
+  ##   put into their own module to ensure the examples do not refer to
+  ##   non-exported symbols.
+  runnableExamples:
+    proc timesTwo*(x: int): int =
+      ## This proc doubles a number.
+      runnableExamples:
+        # at module scope
+        const exported* = 123
+        assert timesTwo(5) == 10
+        block: # at block scope
+          defer: echo "done"
+      runnableExamples "-d:foo -b:cpp":
+        import std/compilesettings
+        assert querySetting(backend) == "cpp"
+        assert defined(foo)
+      runnableExamples "-r:off": ## this one is only compiled
+         import std/browsers
+         openDefaultBrowser "https://forum.nim-lang.org/"
+      2 * x
+
+proc compileOption*(option: string): bool {.
+  magic: "CompileOption", noSideEffect.} =
+  ## Can be used to determine an `on|off` compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples("--floatChecks:off"):
+    static: doAssert not compileOption("floatchecks")
+    {.push floatChecks: on.}
+    static: doAssert compileOption("floatchecks")
+    # floating point NaN and Inf checks enabled in this scope
+    {.pop.}
+
+proc compileOption*(option, arg: string): bool {.
+  magic: "CompileOptionArg", noSideEffect.} =
+  ## Can be used to determine an enum compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples:
+    when compileOption("opt", "size") and compileOption("gc", "boehm"):
+      discard "compiled with optimization for size and uses Boehm's GC"
+
+template currentSourcePath*: string = instantiationInfo(-1, true).filename
+  ## Returns the full file-system path of the current source.
+  ##
+  ## To get the directory containing the current source, use it with
+  ## `ospaths2.parentDir() <ospaths2.html#parentDir%2Cstring>`_ as
+  ## `currentSourcePath.parentDir()`.
+  ##
+  ## The path returned by this template is set at compile time.
+  ##
+  ## See the docstring of `macros.getProjectPath() <macros.html#getProjectPath>`_
+  ## for an example to see the distinction between the `currentSourcePath()`
+  ## and `getProjectPath()`.
+  ##
+  ## See also:
+  ## * `ospaths2.getCurrentDir() proc <ospaths2.html#getCurrentDir>`_
+
+proc slurp*(filename: string): string {.magic: "Slurp".}
+  ## This is an alias for `staticRead <#staticRead,string>`_.
+
+proc staticRead*(filename: string): string {.magic: "Slurp".}
+  ## Compile-time `readFile <syncio.html#readFile,string>`_ proc for easy
+  ## `resource`:idx: embedding:
+  ##
+  ## The maximum file size limit that `staticRead` and `slurp` can read is
+  ## near or equal to the *free* memory of the device you are using to compile.
+  ##   ```nim
+  ##   const myResource = staticRead"mydatafile.bin"
+  ##   ```
+  ##
+  ## `slurp <#slurp,string>`_ is an alias for `staticRead`.
+
+proc gorge*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## This is an alias for `staticExec <#staticExec,string,string,string>`_.
+
+proc staticExec*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## Executes an external process at compile-time and returns its text output
+  ## (stdout + stderr).
+  ##
+  ## If `input` is not an empty string, it will be passed as a standard input
+  ## to the executed program.
+  ##   ```nim
+  ##   const buildInfo = "Revision " & staticExec("git rev-parse HEAD") &
+  ##                     "\nCompiled on " & staticExec("uname -v")
+  ##   ```
+  ##
+  ## `gorge <#gorge,string,string,string>`_ is an alias for `staticExec`.
+  ##
+  ## Note that you can use this proc inside a pragma like
+  ## `passc <manual.html#implementation-specific-pragmas-passc-pragma>`_ or
+  ## `passl <manual.html#implementation-specific-pragmas-passl-pragma>`_.
+  ##
+  ## If `cache` is not empty, the results of `staticExec` are cached within
+  ## the `nimcache` directory. Use `--forceBuild` to get rid of this caching
+  ## behaviour then. `command & input & cache` (the concatenated string) is
+  ## used to determine whether the entry in the cache is still valid. You can
+  ## use versioning information for `cache`:
+  ##   ```nim
+  ##   const stateMachine = staticExec("dfaoptimizer", "input", "0.8.0")
+  ##   ```
+
+proc gorgeEx*(command: string, input = "", cache = ""): tuple[output: string,
+                                                              exitCode: int] =
+  ## Similar to `gorge <#gorge,string,string,string>`_ but also returns the
+  ## precious exit code.
+  discard
diff --git a/lib/system/coro_detection.nim b/lib/system/coro_detection.nim
new file mode 100644
index 000000000..f6c1b5c15
--- /dev/null
+++ b/lib/system/coro_detection.nim
@@ -0,0 +1,20 @@
+## Coroutine detection logic
+
+template coroutinesSupportedPlatform(): bool =
+  when defined(sparc) or defined(ELATE) or defined(boehmgc) or defined(gogc) or
+    defined(nogc) or defined(gcRegions) or defined(gcMarkAndSweep):
+    false
+  else:
+    true
+
+when defined(nimCoroutines):
+  # Explicit opt-in.
+  when not coroutinesSupportedPlatform():
+    {.error: "Coroutines are not supported on this architecture and/or garbage collector.".}
+  const nimCoroutines* = true
+elif defined(noNimCoroutines):
+  # Explicit opt-out.
+  const nimCoroutines* = false
+else:
+  # Autodetect coroutine support.
+  const nimCoroutines* = false
diff --git a/lib/system/countbits_impl.nim b/lib/system/countbits_impl.nim
new file mode 100644
index 000000000..34969cb32
--- /dev/null
+++ b/lib/system/countbits_impl.nim
@@ -0,0 +1,93 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Contains the used algorithms for counting bits.
+
+from std/private/bitops_utils import forwardImpl, castToUnsigned
+
+const useBuiltins* = not defined(noIntrinsicsBitOpts)
+const noUndefined* = defined(noUndefinedBitOpts)
+const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
+                         defined(clang)) and useBuiltins
+const useICC_builtins* = defined(icc) and useBuiltins
+const useVCC_builtins* = defined(vcc) and useBuiltins
+const arch64* = sizeof(int) == 8
+
+template countBitsImpl(n: uint32): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint32(n)
+  v = v - ((v shr 1'u32) and 0x55555555'u32)
+  v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
+  (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
+
+template countBitsImpl(n: uint64): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint64(n)
+  v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
+  v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
+  v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
+  ((v * 0x0101010101010101'u64) shr 56'u64).int
+
+
+when useGCC_builtins:
+  # Returns the number of set 1-bits in value.
+  proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
+  proc builtin_popcountll(x: culonglong): cint {.
+      importc: "__builtin_popcountll", cdecl.}
+
+elif useVCC_builtins:
+  # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
+  func builtin_popcnt16(a2: uint16): uint16 {.
+      importc: "__popcnt16", header: "<intrin.h>".}
+  func builtin_popcnt32(a2: uint32): uint32 {.
+      importc: "__popcnt", header: "<intrin.h>".}
+  func builtin_popcnt64(a2: uint64): uint64 {.
+      importc: "__popcnt64", header: "<intrin.h>".}
+
+elif useICC_builtins:
+  # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
+  # see also: https://software.intel.com/en-us/node/523362
+  # Count the number of bits set to 1 in an integer a, and return that count in dst.
+  func builtin_popcnt32(a: cint): cint {.
+      importc: "_popcnt", header: "<immintrin.h>".}
+  func builtin_popcnt64(a: uint64): cint {.
+      importc: "_popcnt64", header: "<immintrin.h>".}
+
+
+func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
+  ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
+  # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
+  # like GCC and MSVC
+  let x = x.castToUnsigned
+  when nimvm:
+    result = forwardImpl(countBitsImpl, x)
+  else:
+    when useGCC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
+      else: result = builtin_popcountll(x.culonglong).int
+    elif useVCC_builtins:
+      when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
+      elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
+    elif useICC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).cint).int
+    else:
+      when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
+      else: result = countBitsImpl(x.uint64)
+
+proc countBits32*(n: uint32): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
+
+proc countBits64*(n: uint64): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
diff --git a/lib/system/ctypes.nim b/lib/system/ctypes.nim
new file mode 100644
index 000000000..b788274bd
--- /dev/null
+++ b/lib/system/ctypes.nim
@@ -0,0 +1,84 @@
+## Some type definitions for compatibility between different
+## backends and platforms.
+
+type
+  BiggestInt* = int64
+    ## is an alias for the biggest signed integer type the Nim compiler
+    ## supports. Currently this is `int64`, but it is platform-dependent
+    ## in general.
+
+  BiggestFloat* = float64
+    ## is an alias for the biggest floating point type the Nim
+    ## compiler supports. Currently this is `float64`, but it is
+    ## platform-dependent in general.
+
+  BiggestUInt* = uint64
+    ## is an alias for the biggest unsigned integer type the Nim compiler
+    ## supports. Currently this is `uint64`, but it is platform-dependent
+    ## in general.
+
+when defined(windows):
+  type
+    clong* {.importc: "long", nodecl.} = int32
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint32
+      ## This is the same as the type `unsigned long` in *C*.
+else:
+  type
+    clong* {.importc: "long", nodecl.} = int
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint
+      ## This is the same as the type `unsigned long` in *C*.
+
+type # these work for most platforms:
+  cchar* {.importc: "char", nodecl.} = char
+    ## This is the same as the type `char` in *C*.
+  cschar* {.importc: "signed char", nodecl.} = int8
+    ## This is the same as the type `signed char` in *C*.
+  cshort* {.importc: "short", nodecl.} = int16
+    ## This is the same as the type `short` in *C*.
+  cint* {.importc: "int", nodecl.} = int32
+    ## This is the same as the type `int` in *C*.
+  csize_t* {.importc: "size_t", nodecl.} = uint
+    ## This is the same as the type `size_t` in *C*.
+  clonglong* {.importc: "long long", nodecl.} = int64
+    ## This is the same as the type `long long` in *C*.
+  cfloat* {.importc: "float", nodecl.} = float32
+    ## This is the same as the type `float` in *C*.
+  cdouble* {.importc: "double", nodecl.} = float64
+    ## This is the same as the type `double` in *C*.
+  clongdouble* {.importc: "long double", nodecl.} = BiggestFloat
+    ## This is the same as the type `long double` in *C*.
+    ## This C type is not supported by Nim's code generator.
+
+  cuchar* {.importc: "unsigned char", nodecl, deprecated: "use `char` or `uint8` instead".} = char
+    ## Deprecated: Use `uint8` instead.
+  cushort* {.importc: "unsigned short", nodecl.} = uint16
+    ## This is the same as the type `unsigned short` in *C*.
+  cuint* {.importc: "unsigned int", nodecl.} = uint32
+    ## This is the same as the type `unsigned int` in *C*.
+  culonglong* {.importc: "unsigned long long", nodecl.} = uint64
+    ## This is the same as the type `unsigned long long` in *C*.
+
+type
+  ByteAddress* {.deprecated: "use `uint`".} = int
+    ## is the signed integer type that should be used for converting
+    ## pointers to integer addresses for readability.
+
+  cstringArray* {.importc: "char**", nodecl.} = ptr UncheckedArray[cstring]
+    ## This is binary compatible to the type `char**` in *C*. The array's
+    ## high value is large enough to disable bounds checking in practice.
+    ## Use `cstringArrayToSeq proc <#cstringArrayToSeq,cstringArray,Natural>`_
+    ## to convert it into a `seq[string]`.
+
+when not defined(nimPreviewSlimSystem):
+  # pollutes namespace
+  type
+    PFloat32* {.deprecated: "use `ptr float32`".} = ptr float32
+      ## An alias for `ptr float32`.
+    PFloat64* {.deprecated: "use `ptr float64`".} = ptr float64
+      ## An alias for `ptr float64`.
+    PInt64* {.deprecated: "use `ptr int64`".} = ptr int64
+      ## An alias for `ptr int64`.
+    PInt32* {.deprecated: "use `ptr int32`".} = ptr int32
+      ## An alias for `ptr int32`.
diff --git a/lib/system/cyclebreaker.nim b/lib/system/cyclebreaker.nim
new file mode 100644
index 000000000..45b0a5a65
--- /dev/null
+++ b/lib/system/cyclebreaker.nim
@@ -0,0 +1,184 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#[
+A Cycle breaker for Nim
+-----------------------
+
+Instead of "collecting" cycles with all of its pitfalls we will break cycles.
+We exploit that every 'ref' can be 'nil' for this and so get away without
+a distinction between weak and strong pointers. The required runtime
+mechanisms are the same though: We need to be able to traverse the graph.
+This design has the tremendous benefit that it doesn't require a dedicated
+'rawDispose' operation and that it plays well with Nim's cost model.
+The cost of freeing a subgraph with cycles is 2 * N rather than N, that's all.
+
+Cycles do not have to be prepared via .acyclic, there are not multiple
+pointless traversals, only a single proc, `breakCycles` is exposed as a
+separate module.
+
+Algorithm
+---------
+
+We traverse the graph and notice the nodes we've already traversed. If we
+marked the node already, we set the pointer that leads to this node to 'nil'
+and decrement the reference count of the cell we pointed at.
+
+We notice that multiple paths to the same object do not mean
+we found a cycle, it only means the node is shared.
+
+
+   a -------> b <----- c
+   |          ^        ^
+   +----------+        |
+   |                   |
+   +-------------------+
+
+If we simply remove all links to already processed nodes we end up with:
+
+   a -------> b        c
+   |                   ^
+   +                   |
+   |                   |
+   +-------------------+
+
+That seems acceptable, no leak is produced. This implies that the standard
+depth-first traversal suffices.
+
+]#
+
+include cellseqs_v2
+
+const
+  colGreen = 0b000
+  colYellow = 0b001
+  colRed = 0b010
+  colorMask = 0b011
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
+  DisposeProc = proc (p: pointer) {.nimcall, benign.}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  c.rc = c.rc and not colorMask or col
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  inc h.rc, rcIncrement
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+
+proc trace(p: pointer; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  when false:
+    cprintf("[Trace] desc: %p %p\n", desc, p)
+    cprintf("[Trace] trace: %p\n", desc.traceImpl)
+  if desc.traceImpl != nil:
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl.} =
+  let p = cast[ptr pointer](q)
+  when traceCollector:
+    cprintf("[Trace] raw: %p\n", p)
+    cprintf("[Trace] deref: %p\n", p[])
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl.} =
+  let p = cast[ptr pointer](q)
+  when traceCollector:
+    cprintf("[TraceDyn] raw: %p\n", p)
+    cprintf("[TraceDyn] deref: %p\n", p[])
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+var markerGeneration: int
+
+proc breakCycles(s: Cell; desc: PNimTypeV2) =
+  let markerColor = if (markerGeneration and 1) == 0: colRed
+                    else: colYellow
+  atomicInc markerGeneration
+  when traceCollector:
+    cprintf("[BreakCycles] starting: %p %s RC %ld trace proc %p\n",
+      s, desc.name, s.rc shr rcShift, desc.traceImpl)
+
+  var j: GcEnv
+  init j.traceStack
+  s.setColor markerColor
+  trace(s +! sizeof(RefHeader), desc, j)
+
+  while j.traceStack.len > 0:
+    let (u, desc) = j.traceStack.pop()
+    let p = u[]
+    let t = head(p)
+    if t.color != markerColor:
+      t.setColor markerColor
+      trace(p, desc, j)
+      when traceCollector:
+        cprintf("[BreakCycles] followed: %p RC %ld\n", t, t.rc shr rcShift)
+    else:
+      if (t.rc shr rcShift) > 0:
+        dec t.rc, rcIncrement
+        # mark as a link that the produced destructor does not have to follow:
+        u[] = nil
+        when traceCollector:
+          cprintf("[BreakCycles] niled out: %p RC %ld\n", t, t.rc shr rcShift)
+      else:
+        # anyhow as a link that the produced destructor does not have to follow:
+        u[] = nil
+        when traceCollector:
+          cprintf("[Bug] %p %s RC %ld\n", t, desc.name, t.rc shr rcShift)
+  deinit j.traceStack
+
+proc thinout*[T](x: ref T) {.inline.} =
+  ## turn the subgraph starting with `x` into its spanning tree by
+  ## `nil`'ing out any pointers that would harm the spanning tree
+  ## structure. Any back pointers that introduced cycles
+  ## and thus would keep the graph from being freed are `nil`'ed.
+  ## This is a form of cycle collection that works well with Nim's ARC
+  ## and its associated cost model.
+  proc getDynamicTypeInfo[T](x: T): PNimTypeV2 {.magic: "GetTypeInfoV2", noSideEffect.}
+
+  breakCycles(head(cast[pointer](x)), getDynamicTypeInfo(x[]))
+
+proc thinout*[T: proc](x: T) {.inline.} =
+  proc rawEnv[T: proc](x: T): pointer {.noSideEffect, inline.} =
+    {.emit: """
+    `result` = `x`.ClE_0;
+    """.}
+
+  let p = rawEnv(x)
+  breakCycles(head(p), cast[ptr PNimTypeV2](p)[])
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+      # According to Lins it's correct to do nothing else here.
+      #cprintf("[DeCREF] %p\n", p)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p %s\n", p, desc.name)
+    else:
+      dec cell.rc, rcIncrement
+      #cprintf("[DeCREF] %p %s %ld\n", p, desc.name, cell.rc)
diff --git a/lib/system/debugger.nim b/lib/system/debugger.nim
deleted file mode 100644
index 937c0d6f0..000000000
--- a/lib/system/debugger.nim
+++ /dev/null
@@ -1,305 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## This file implements basic features for any debugger.
-
-type
-  VarSlot* {.compilerproc, final.} = object ## a slot in a frame
-    address*: pointer ## the variable's address
-    typ*: PNimType    ## the variable's type
-    name*: cstring    ## the variable's name; for globals this is "module.name"
-
-  PExtendedFrame = ptr ExtendedFrame
-  ExtendedFrame = object  # If the debugger is enabled the compiler
-                           # provides an extended frame. Of course
-                           # only slots that are
-                           # needed are allocated and not 10_000,
-                           # except for the global data description.
-    f: TFrame
-    slots: array[0..10_000, VarSlot]
-{.deprecated: [TVarSlot: VarSlot, TExtendedFrame: ExtendedFrame].}
-
-var
-  dbgGlobalData: ExtendedFrame  # this reserves much space, but
-                                # for now it is the most practical way
-
-proc dbgRegisterGlobal(name: cstring, address: pointer,
-                       typ: PNimType) {.compilerproc.} =
-  let i = dbgGlobalData.f.len
-  if i >= high(dbgGlobalData.slots):
-    #debugOut("[Warning] cannot register global ")
-    return
-  dbgGlobalData.slots[i].name = name
-  dbgGlobalData.slots[i].typ = typ
-  dbgGlobalData.slots[i].address = address
-  inc(dbgGlobalData.f.len)
-
-proc getLocal*(frame: PFrame; slot: int): VarSlot {.inline.} =
-  ## retrieves the meta data for the local variable at `slot`. CAUTION: An
-  ## invalid `slot` value causes a corruption!
-  result = cast[PExtendedFrame](frame).slots[slot]
-
-proc getGlobalLen*(): int {.inline.} =
-  ## gets the number of registered globals.
-  result = dbgGlobalData.f.len
-
-proc getGlobal*(slot: int): VarSlot {.inline.} =
-  ## retrieves the meta data for the global variable at `slot`. CAUTION: An
-  ## invalid `slot` value causes a corruption!
-  result = dbgGlobalData.slots[slot]
-
-# ------------------- breakpoint support ------------------------------------
-
-type
-  Breakpoint* = object   ## represents a break point
-    low*, high*: int     ## range from low to high; if disabled
-                         ## both low and high are set to their negative values
-    filename*: cstring   ## the filename of the breakpoint
-
-var
-  dbgBP: array[0..127, Breakpoint] # breakpoints
-  dbgBPlen: int
-  dbgBPbloom: int64  # we use a bloom filter to speed up breakpoint checking
-
-  dbgFilenames*: array[0..300, cstring] ## registered filenames;
-                                        ## 'nil' terminated
-  dbgFilenameLen: int
-
-proc dbgRegisterFilename(filename: cstring) {.compilerproc.} =
-  # XXX we could check for duplicates here for DLL support
-  dbgFilenames[dbgFilenameLen] = filename
-  inc dbgFilenameLen
-
-proc dbgRegisterBreakpoint(line: int,
-                           filename, name: cstring) {.compilerproc.} =
-  let x = dbgBPlen
-  if x >= high(dbgBP):
-    #debugOut("[Warning] cannot register breakpoint")
-    return
-  inc(dbgBPlen)
-  dbgBP[x].filename = filename
-  dbgBP[x].low = line
-  dbgBP[x].high = line
-  dbgBPbloom = dbgBPbloom or line
-
-proc addBreakpoint*(filename: cstring, lo, hi: int): bool =
-  let x = dbgBPlen
-  if x >= high(dbgBP): return false
-  inc(dbgBPlen)
-  result = true
-  dbgBP[x].filename = filename
-  dbgBP[x].low = lo
-  dbgBP[x].high = hi
-  for line in lo..hi: dbgBPbloom = dbgBPbloom or line
-
-const
-  FileSystemCaseInsensitive = defined(windows) or defined(dos) or defined(os2)
-
-proc fileMatches(c, bp: cstring): bool =
-  # bp = breakpoint filename
-  # c = current filename
-  # we consider it a match if bp is a suffix of c
-  # and the character for the suffix does not exist or
-  # is one of: \  /  :
-  # depending on the OS case does not matter!
-  var blen: int = bp.len
-  var clen: int = c.len
-  if blen > clen: return false
-  # check for \ /  :
-  if clen-blen-1 >= 0 and c[clen-blen-1] notin {'\\', '/', ':'}:
-    return false
-  var i = 0
-  while i < blen:
-    var x = bp[i]
-    var y = c[i+clen-blen]
-    when FileSystemCaseInsensitive:
-      if x >= 'A' and x <= 'Z': x = chr(ord(x) - ord('A') + ord('a'))
-      if y >= 'A' and y <= 'Z': y = chr(ord(y) - ord('A') + ord('a'))
-    if x != y: return false
-    inc(i)
-  return true
-
-proc canonFilename*(filename: cstring): cstring =
-  ## returns 'nil' if the filename cannot be found.
-  for i in 0 .. dbgFilenameLen-1:
-    result = dbgFilenames[i]
-    if fileMatches(result, filename): return result
-  result = nil
-
-iterator listBreakpoints*(): ptr Breakpoint =
-  ## lists all breakpoints.
-  for i in 0..dbgBPlen-1: yield addr(dbgBP[i])
-
-proc isActive*(b: ptr Breakpoint): bool = b.low > 0
-proc flip*(b: ptr Breakpoint) =
-  ## enables or disables 'b' depending on its current state.
-  b.low = -b.low; b.high = -b.high
-
-proc checkBreakpoints*(filename: cstring, line: int): ptr Breakpoint =
-  ## in which breakpoint (if any) we are.
-  if (dbgBPbloom and line) != line: return nil
-  for b in listBreakpoints():
-    if line >= b.low and line <= b.high and filename == b.filename: return b
-
-# ------------------- watchpoint support ------------------------------------
-
-type
-  Hash = int
-  Watchpoint {.pure, final.} = object
-    name: cstring
-    address: pointer
-    typ: PNimType
-    oldValue: Hash
-{.deprecated: [THash: Hash, TWatchpoint: Watchpoint].}
-
-var
-  watchpoints: array[0..99, Watchpoint]
-  watchpointsLen: int
-
-proc `!&`(h: Hash, val: int): Hash {.inline.} =
-  result = h +% val
-  result = result +% result shl 10
-  result = result xor (result shr 6)
-
-proc `!$`(h: Hash): Hash {.inline.} =
-  result = h +% h shl 3
-  result = result xor (result shr 11)
-  result = result +% result shl 15
-
-proc hash(data: pointer, size: int): Hash =
-  var h: Hash = 0
-  var p = cast[cstring](data)
-  var i = 0
-  var s = size
-  while s > 0:
-    h = h !& ord(p[i])
-    inc(i)
-    dec(s)
-  result = !$h
-
-proc hashGcHeader(data: pointer): Hash =
-  const headerSize = sizeof(int)*2
-  result = hash(cast[pointer](cast[int](data) -% headerSize), headerSize)
-
-proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
-                    h: Hash): Hash
-proc genericHashAux(dest: pointer, n: ptr TNimNode, shallow: bool,
-                    h: Hash): Hash =
-  var d = cast[ByteAddress](dest)
-  case n.kind
-  of nkSlot:
-    result = genericHashAux(cast[pointer](d +% n.offset), n.typ, shallow, h)
-  of nkList:
-    result = h
-    for i in 0..n.len-1:
-      result = result !& genericHashAux(dest, n.sons[i], shallow, result)
-  of nkCase:
-    result = h !& hash(cast[pointer](d +% n.offset), n.typ.size)
-    var m = selectBranch(dest, n)
-    if m != nil: result = genericHashAux(dest, m, shallow, result)
-  of nkNone: sysAssert(false, "genericHashAux")
-
-proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
-                    h: Hash): Hash =
-  sysAssert(mt != nil, "genericHashAux 2")
-  case mt.kind
-  of tyString:
-    var x = cast[PPointer](dest)[]
-    result = h
-    if x != nil:
-      let s = cast[NimString](x)
-      when defined(trackGcHeaders):
-        result = result !& hashGcHeader(x)
-      else:
-        result = result !& hash(x, s.len)
-  of tySequence:
-    var x = cast[PPointer](dest)
-    var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    result = h
-    if dst != 0:
-      when defined(trackGcHeaders):
-        result = result !& hashGcHeader(cast[PPointer](dest)[])
-      else:
-        for i in 0..cast[PGenericSeq](dst).len-1:
-          result = result !& genericHashAux(
-            cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-            mt.base, shallow, result)
-  of tyObject, tyTuple:
-    # we don't need to copy m_type field for tyObject, as they are equal anyway
-    result = genericHashAux(dest, mt.node, shallow, h)
-  of tyArray, tyArrayConstr:
-    let d = cast[ByteAddress](dest)
-    result = h
-    for i in 0..(mt.size div mt.base.size)-1:
-      result = result !& genericHashAux(cast[pointer](d +% i*% mt.base.size),
-                                        mt.base, shallow, result)
-  of tyRef:
-    when defined(trackGcHeaders):
-      var s = cast[PPointer](dest)[]
-      if s != nil:
-        result = result !& hashGcHeader(s)
-    else:
-      if shallow:
-        result = h !& hash(dest, mt.size)
-      else:
-        result = h
-        var s = cast[PPointer](dest)[]
-        if s != nil:
-          result = result !& genericHashAux(s, mt.base, shallow, result)
-  else:
-    result = h !& hash(dest, mt.size) # hash raw bits
-
-proc genericHash(dest: pointer, mt: PNimType): int =
-  result = genericHashAux(dest, mt, false, 0)
-
-proc dbgRegisterWatchpoint(address: pointer, name: cstring,
-                           typ: PNimType) {.compilerproc.} =
-  let L = watchPointsLen
-  for i in 0 .. pred(L):
-    if watchPoints[i].name == name:
-      # address may have changed:
-      watchPoints[i].address = address
-      return
-  if L >= watchPoints.high:
-    #debugOut("[Warning] cannot register watchpoint")
-    return
-  watchPoints[L].name = name
-  watchPoints[L].address = address
-  watchPoints[L].typ = typ
-  watchPoints[L].oldValue = genericHash(address, typ)
-  inc watchPointsLen
-
-proc dbgUnregisterWatchpoints*() =
-  watchPointsLen = 0
-
-var
-  dbgLineHook*: proc () {.nimcall.}
-    ## set this variable to provide a procedure that should be called before
-    ## each executed instruction. This should only be used by debuggers!
-    ## Only code compiled with the ``debugger:on`` switch calls this hook.
-
-  dbgWatchpointHook*: proc (watchpointName: cstring) {.nimcall.}
-
-proc checkWatchpoints =
-  let L = watchPointsLen
-  for i in 0 .. pred(L):
-    let newHash = genericHash(watchPoints[i].address, watchPoints[i].typ)
-    if newHash != watchPoints[i].oldValue:
-      dbgWatchpointHook(watchPoints[i].name)
-      watchPoints[i].oldValue = newHash
-
-proc endb(line: int, file: cstring) {.compilerproc, noinline.} =
-  # This proc is called before every Nim code line!
-  if framePtr == nil: return
-  if dbgWatchpointHook != nil: checkWatchpoints()
-  framePtr.line = line # this is done here for smaller code size!
-  framePtr.filename = file
-  if dbgLineHook != nil: dbgLineHook()
-
-include "system/endb"
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 750da00cf..72d35f518 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -7,10 +7,13 @@
 #    distribution, for details about the copyright.
 #
 
+const
+  TableSize = when sizeof(int) <= 2: 0xff else: 0xff_ffff
+
 type
   PtrTable = ptr object
     counter, max: int
-    data: array[0xff_ffff, (pointer, pointer)]
+    data: array[TableSize, (pointer, pointer)]
 
 template hashPtr(key: pointer): int = cast[int](key) shr 8
 template allocPtrTable: untyped =
@@ -58,8 +61,8 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType;
 proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
                         tab: var PtrTable) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericDeepCopyAux(cast[pointer](d +% n.offset),
@@ -82,33 +85,40 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
 
 proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericDeepCopyAux 2")
   case mt.kind
   of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil:
-      unsureAsgnRef(x, s2)
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
     else:
-      unsureAsgnRef(x, copyDeepString(cast[NimString](s2)))
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyDeepString(cast[NimString](s2)))
   of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      unsureAsgnRef(x, s2)
-      return
-    sysAssert(dest != nil, "genericDeepCopyAux 3")
-    unsureAsgnRef(x, newSeq(mt, seq.len))
-    var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    for i in 0..seq.len-1:
-      genericDeepCopyAux(
-        cast[pointer](dst +% i *% mt.base.size +% GenericSeqSize),
-        cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                     GenericSeqSize),
-        mt.base, tab)
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericDeepCopyAux, tab)
+    else:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericDeepCopyAux 3")
+      unsureAsgnRef(x, newSeq(mt, seq.len))
+      var dst = cast[int](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        genericDeepCopyAux(
+          cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          mt.base, tab)
   of tyObject:
     # we need to copy m_type field for tyObject, as it could be empty for
     # sequence reallocations:
@@ -124,13 +134,16 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
     for i in 0..(mt.size div mt.base.size)-1:
       genericDeepCopyAux(cast[pointer](d +% i *% mt.base.size),
                          cast[pointer](s +% i *% mt.base.size), mt.base, tab)
-  of tyRef, tyOptAsRef:
+  of tyRef:
     let s2 = cast[PPointer](src)[]
     if s2 == nil:
       unsureAsgnRef(cast[PPointer](dest), s2)
     elif mt.base.deepcopy != nil:
       let z = mt.base.deepcopy(s2)
-      unsureAsgnRef(cast[PPointer](dest), z)
+      when defined(nimSeqsV2):
+        cast[PPointer](dest)[] = z
+      else:
+        unsureAsgnRef(cast[PPointer](dest), z)
     else:
       let z = tab.get(s2)
       if z == nil:
@@ -147,10 +160,16 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
             let x = usrToCell(s2)
             let realType = x.typ
             sysAssert realType == mt, " types do differ"
-          # this version should work for any possible GC:
-          let typ = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[] else: mt.base
-          let z = newObj(mt, typ.size)
-          unsureAsgnRef(cast[PPointer](dest), z)
+          when defined(nimSeqsV2):
+            let typ = if mt.base.kind == tyObject: cast[PNimType](cast[ptr PNimTypeV2](s2)[].typeInfoV1)
+                      else: mt.base
+            let z = nimNewObj(typ.size, typ.align)
+            cast[PPointer](dest)[] = z
+          else:
+            # this version should work for any other GC:
+            let typ = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[] else: mt.base
+            let z = newObj(mt, typ.size)
+            unsureAsgnRef(cast[PPointer](dest), z)
           tab.put(s2, z)
           genericDeepCopyAux(z, s2, typ, tab)
       else:
@@ -165,14 +184,14 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
   else:
     copyMem(dest, src, mt.size)
 
-proc genericDeepCopy(dest, src: pointer, mt: PNimType) {.compilerProc.} =
-  GC_disable()
+proc genericDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
+  when not defined(nimSeqsV2): GC_disable()
   var tab = initPtrTable()
   genericDeepCopyAux(dest, src, mt, tab)
   deinit tab
-  GC_enable()
+  when not defined(nimSeqsV2): GC_enable()
 
-proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerProc.} =
+proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   # also invoked for 'string'
   var src = src
   genericDeepCopy(dest, addr(src), mt)
@@ -180,8 +199,8 @@ proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerProc.} =
 proc genericDeepCopyOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericDeepCopy(cast[pointer](d +% i *% mt.base.size),
                     cast[pointer](s +% i *% mt.base.size), mt.base)
diff --git a/lib/system/dollars.nim b/lib/system/dollars.nim
new file mode 100644
index 000000000..89a739d5a
--- /dev/null
+++ b/lib/system/dollars.nim
@@ -0,0 +1,147 @@
+## `$` is Nim's general way of spelling `toString`:idx:.
+runnableExamples:
+  assert $0.1 == "0.1"
+  assert $(-2*3) == "-6"
+
+import std/private/[digitsutils, miscdollars]
+
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export addFloat
+
+  func `$`*(x: float | float32): string =
+    ## Outplace version of `addFloat`.
+    result.addFloat(x)
+
+proc `$`*(x: int): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: int64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: uint64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  addInt(result, x)
+
+# same as old `ctfeWhitelist` behavior, whether or not this is a good idea.
+template gen(T) =
+  # xxx simplify this by supporting this in compiler: int{lit} | uint64{lit} | int64{lit}
+  func `$`*(x: T{lit}): string {.compileTime.} = result.addInt(x)
+gen(int)
+gen(uint64)
+gen(int64)
+
+
+proc `$`*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
+  ## The stringify operator for a boolean argument. Returns `x`
+  ## converted to the string "false" or "true".
+
+proc `$`*(x: char): string {.magic: "CharToStr", noSideEffect.}
+  ## The stringify operator for a character argument. Returns `x`
+  ## converted to a string.
+  ##   ```Nim
+  ##   assert $'c' == "c"
+  ##   ```
+
+proc `$`*(x: cstring): string {.magic: "CStrToStr", noSideEffect.}
+  ## The stringify operator for a CString argument. Returns `x`
+  ## converted to a string.
+
+proc `$`*(x: string): string {.magic: "StrToStr", noSideEffect.}
+  ## The stringify operator for a string argument. Returns `x`
+  ## as it is. This operator is useful for generic code, so
+  ## that `$expr` also works if `expr` is already a string.
+
+proc `$`*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect.}
+  ## The stringify operator for an enumeration argument. This works for
+  ## any enumeration type thanks to compiler magic.
+  ##
+  ## If a `$` operator for a concrete enumeration is provided, this is
+  ## used instead. (In other words: *Overwriting* is possible.)
+
+proc `$`*(t: typedesc): string {.magic: "TypeTrait".}
+  ## Returns the name of the given type.
+  ##
+  ## For more procedures dealing with `typedesc`, see
+  ## `typetraits module <typetraits.html>`_.
+  ##
+  ##   ```Nim
+  ##   doAssert $(typeof(42)) == "int"
+  ##   doAssert $(typeof("Foo")) == "string"
+  ##   static: doAssert $(typeof(@['A', 'B'])) == "seq[char]"
+  ##   ```
+
+proc `$`*[T: tuple](x: T): string =
+  ## Generic `$` operator for tuples that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(23, 45) == "(23, 45)"
+  ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
+  ##   $() == "()"
+  ##   ```
+  tupleObjectDollar(result, x)
+
+when not defined(nimPreviewSlimSystem):
+  import std/objectdollar
+  export objectdollar
+
+proc collectionToString[T](x: T, prefix, separator, suffix: string): string =
+  result = prefix
+  var firstElement = true
+  for value in items(x):
+    if firstElement:
+      firstElement = false
+    else:
+      result.add(separator)
+
+    when value isnot string and value isnot seq and compiles(value.isNil):
+      # this branch should not be necessary
+      if value.isNil:
+        result.add "nil"
+      else:
+        result.addQuoted(value)
+    else:
+      result.addQuoted(value)
+  result.add(suffix)
+
+proc `$`*[T](x: set[T]): string =
+  ## Generic `$` operator for sets that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   ${23, 45} == "{23, 45}"
+  ##   ```
+  collectionToString(x, "{", ", ", "}")
+
+proc `$`*[T](x: seq[T]): string =
+  ## Generic `$` operator for seqs that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
+  collectionToString(x, "@[", ", ", "]")
+
+proc `$`*[T, U](x: HSlice[T, U]): string =
+  ## Generic `$` operator for slices that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##  $(1 .. 5) == "1 .. 5"
+  ##  ```
+  result = $x.a
+  result.add(" .. ")
+  result.add($x.b)
+
+
+when not defined(nimNoArrayToString):
+  proc `$`*[T, IDX](x: array[IDX, T]): string =
+    ## Generic `$` operator for arrays that is lifted from the components.
+    collectionToString(x, "[", ", ", "]")
+
+proc `$`*[T](x: openArray[T]): string =
+  ## Generic `$` operator for openarrays that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
+  collectionToString(x, "[", ", ", "]")
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index f1ff307da..2162b234f 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -17,35 +17,44 @@
 const
   NilLibHandle: LibHandle = nil
 
-proc c_fwrite(buf: pointer, size, n: csize, f: File): cint {.
-  importc: "fwrite", header: "<stdio.h>".}
-
-proc rawWrite(f: File, s: string) =
-  # we cannot throw an exception here!
-  discard c_fwrite(cstring(s), 1, s.len, f)
-
 proc nimLoadLibraryError(path: string) =
   # carefully written to avoid memory allocation:
-  stderr.rawWrite("could not load: ")
-  stderr.rawWrite(path)
-  stderr.rawWrite("\n")
+  const prefix = "could not load: "
+  cstderr.rawWrite(prefix)
+  cstderr.rawWrite(path)
   when not defined(nimDebugDlOpen) and not defined(windows):
-    stderr.rawWrite("compile with -d:nimDebugDlOpen for more information\n")
-  when defined(windows) and defined(guiapp):
-    # Because console output is not shown in GUI apps, display error as message box:
-    const prefix = "could not load: "
-    var msg: array[1000, char]
-    copyMem(msg[0].addr, prefix.cstring, prefix.len)
-    copyMem(msg[prefix.len].addr, path.cstring, min(path.len + 1, 1000 - prefix.len))
-    discard MessageBoxA(0, msg[0].addr, nil, 0)
-  quit(1)
-
-proc procAddrError(name: cstring) {.noinline.} =
+    cstderr.rawWrite("\n(compile with -d:nimDebugDlOpen for more information)")
+  when defined(windows):
+    const badExe = "\n(bad format; library may be wrong architecture)"
+    let loadError = GetLastError()
+    if loadError == ERROR_BAD_EXE_FORMAT:
+      cstderr.rawWrite(badExe)
+    when defined(guiapp):
+      # Because console output is not shown in GUI apps, display the error as a
+      # message box instead:
+      var
+        msg: array[1000, char]
+        msgLeft = msg.len - 1 # leave (at least) one for nullchar
+        msgIdx = 0
+      copyMem(msg[msgIdx].addr, prefix.cstring, prefix.len)
+      msgLeft -= prefix.len
+      msgIdx += prefix.len
+      let pathLen = min(path.len, msgLeft)
+      copyMem(msg[msgIdx].addr, path.cstring, pathLen)
+      msgLeft -= pathLen
+      msgIdx += pathLen
+      if loadError == ERROR_BAD_EXE_FORMAT and msgLeft >= badExe.len:
+        copyMem(msg[msgIdx].addr, badExe.cstring, badExe.len)
+      discard MessageBoxA(nil, msg[0].addr, nil, 0)
+  cstderr.rawWrite("\n")
+  rawQuit(1)
+
+proc procAddrError(name: cstring) {.compilerproc, nonReloadable, hcrInline.} =
   # carefully written to avoid memory allocation:
-  stderr.rawWrite("could not import: ")
-  stderr.write(name)
-  stderr.rawWrite("\n")
-  quit(1)
+  cstderr.rawWrite("could not import: ")
+  cstderr.rawWrite(name)
+  cstderr.rawWrite("\n")
+  rawQuit(1)
 
 # this code was inspired from Lua's source code:
 # Lua - An Extensible Extension Language
@@ -82,12 +91,15 @@ when defined(posix):
     dlclose(lib)
 
   proc nimLoadLibrary(path: string): LibHandle =
-    result = dlopen(path, RTLD_NOW)
+    let flags =
+      when defined(globalSymbols): RTLD_NOW or RTLD_GLOBAL
+      else: RTLD_NOW
+    result = dlopen(path, flags)
     when defined(nimDebugDlOpen):
       let error = dlerror()
       if error != nil:
-        stderr.write(error)
-        stderr.rawWrite("\n")
+        cstderr.rawWrite(error)
+        cstderr.rawWrite("\n")
 
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = dlsym(lib, name)
@@ -125,11 +137,11 @@ elif defined(windows) or defined(dos):
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = getProcAddress(cast[THINSTANCE](lib), name)
     if result != nil: return
-    const decorated_length = 250
-    var decorated: array[decorated_length, char]
+    const decoratedLength = 250
+    var decorated: array[decoratedLength, char]
     decorated[0] = '_'
     var m = 1
-    while m < (decorated_length - 5):
+    while m < (decoratedLength - 5):
       if name[m - 1] == '\x00': break
       decorated[m] = name[m - 1]
       inc(m)
@@ -149,23 +161,38 @@ elif defined(windows) or defined(dos):
         dec(m)
         k = k div 10
         if k == 0: break
-      when defined(nimNoArrayToCstringConversion):
-        result = getProcAddress(cast[THINSTANCE](lib), addr decorated)
-      else:
-        result = getProcAddress(cast[THINSTANCE](lib), decorated)
+      result = getProcAddress(cast[THINSTANCE](lib), cast[cstring](addr decorated))
       if result != nil: return
     procAddrError(name)
 
 elif defined(genode):
 
-  proc nimUnloadLibrary(lib: LibHandle) {.
-    error: "nimUnloadLibrary not implemented".}
+  proc nimUnloadLibrary(lib: LibHandle) =
+    raiseAssert("nimUnloadLibrary not implemented")
 
-  proc nimLoadLibrary(path: string): LibHandle {.
-    error: "nimLoadLibrary not implemented".}
+  proc nimLoadLibrary(path: string): LibHandle =
+    raiseAssert("nimLoadLibrary not implemented")
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    raiseAssert("nimGetProcAddr not implemented")
 
-  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.
-    error: "nimGetProcAddr not implemented".}
+elif defined(nintendoswitch) or defined(freertos) or defined(zephyr) or defined(nuttx):
+  proc nimUnloadLibrary(lib: LibHandle) =
+    cstderr.rawWrite("nimUnLoadLibrary not implemented")
+    cstderr.rawWrite("\n")
+    rawQuit(1)
+
+  proc nimLoadLibrary(path: string): LibHandle =
+    cstderr.rawWrite("nimLoadLibrary not implemented")
+    cstderr.rawWrite("\n")
+    rawQuit(1)
+
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    cstderr.rawWrite("nimGetProAddr not implemented")
+    cstderr.rawWrite(name)
+    cstderr.rawWrite("\n")
+    rawQuit(1)
 
 else:
   {.error: "no implementation for dyncalls".}
diff --git a/lib/system/embedded.nim b/lib/system/embedded.nim
index 4d453fcca..ea6776f58 100644
--- a/lib/system/embedded.nim
+++ b/lib/system/embedded.nim
@@ -19,8 +19,9 @@ proc nimFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} = discard
 proc popFrame {.compilerRtl, inl.} = discard
 
 proc setFrame(s: PFrame) {.compilerRtl, inl.} = discard
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
-proc popSafePoint {.compilerRtl, inl.} = discard
+when not gotoBasedExceptions:
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
+  proc popSafePoint {.compilerRtl, inl.} = discard
 proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = discard
 proc popCurrentException {.compilerRtl, inl.} = discard
 
@@ -29,18 +30,32 @@ const
   nativeStackTraceSupported = false
   hasSomeStackTrace = false
 
-proc quitOrDebug() {.inline.} =
-  quit(1)
+proc quitOrDebug() {.noreturn, importc: "abort", header: "<stdlib.h>", nodecl.}
 
 proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
-  sysFatal(ReraiseError, "exception handling is not available")
+  sysFatal(ReraiseDefect, "exception handling is not available")
+
+proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
+                      line: int) {.compilerRtl.} =
+  sysFatal(ReraiseDefect, "exception handling is not available")
 
 proc reraiseException() {.compilerRtl.} =
-  sysFatal(ReraiseError, "no exception to reraise")
+  sysFatal(ReraiseDefect, "no exception to reraise")
 
 proc writeStackTrace() = discard
 
+proc unsetControlCHook() = discard
 proc setControlCHook(hook: proc () {.noconv.}) = discard
 
 proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
-  sysFatal(ReraiseError, "exception handling is not available")
+  sysFatal(ReraiseDefect, "exception handling is not available")
+
+when gotoBasedExceptions:
+  var nimInErrorMode {.threadvar.}: bool
+
+  proc nimErrorFlag(): ptr bool {.compilerRtl, inl.} =
+    result = addr(nimInErrorMode)
+
+  proc nimTestErrorFlag() {.compilerRtl.} =
+    if nimInErrorMode:
+      sysFatal(ReraiseDefect, "exception handling is not available")
diff --git a/lib/system/endb.nim b/lib/system/endb.nim
deleted file mode 100644
index d51ae29df..000000000
--- a/lib/system/endb.nim
+++ /dev/null
@@ -1,558 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# This file implements the embedded debugger that can be linked
-# with the application. Mostly we do not use dynamic memory here as that
-# would interfere with the GC and trigger ON/OFF errors if the
-# user program corrupts memory. Unfortunately, for dispaying
-# variables we use the ``system.repr()`` proc which uses Nim
-# strings and thus allocates memory from the heap. Pity, but
-# I do not want to implement ``repr()`` twice.
-
-const
-  EndbBeg = "*** endb"
-  EndbEnd = "***\n"
-
-type
-  StaticStr = object
-    len: int
-    data: array[0..100, char]
-
-  BreakpointFilename = object
-    b: ptr Breakpoint
-    filename: StaticStr
-
-  DbgState = enum
-    dbOff,        # debugger is turned off
-    dbStepInto,   # debugger is in tracing mode
-    dbStepOver,
-    dbSkipCurrent,
-    dbQuiting,    # debugger wants to quit
-    dbBreakpoints # debugger is only interested in breakpoints
-{.deprecated: [TStaticStr: StaticStr, TBreakpointFilename: BreakpointFilename,
-              TDbgState: DbgState].}
-
-var
-  dbgUser: StaticStr    # buffer for user input; first command is ``step_into``
-                        # needs to be global cause we store the last command
-                        # in it
-  dbgState: DbgState    # state of debugger
-  dbgSkipToFrame: PFrame # frame to be skipped to
-
-  maxDisplayRecDepth: int = 5 # do not display too much data!
-
-  brkPoints: array[0..127, BreakpointFilename]
-
-proc setLen(s: var StaticStr, newLen=0) =
-  s.len = newLen
-  s.data[newLen] = '\0'
-
-proc add(s: var StaticStr, c: char) =
-  if s.len < high(s.data)-1:
-    s.data[s.len] = c
-    s.data[s.len+1] = '\0'
-    inc s.len
-
-proc add(s: var StaticStr, c: cstring) =
-  var i = 0
-  while c[i] != '\0':
-    add s, c[i]
-    inc i
-
-proc assign(s: var StaticStr, c: cstring) =
-  setLen(s)
-  add s, c
-
-proc `==`(a, b: StaticStr): bool =
-  if a.len == b.len:
-    for i in 0 .. a.len-1:
-      if a.data[i] != b.data[i]: return false
-    return true
-
-proc `==`(a: StaticStr, b: cstring): bool =
-  result = c_strcmp(unsafeAddr a.data, b) == 0
-
-proc write(f: File, s: StaticStr) =
-  write(f, cstring(unsafeAddr s.data))
-
-proc listBreakPoints() =
-  write(stdout, EndbBeg)
-  write(stdout, "| Breakpoints:\n")
-  for b in listBreakpoints():
-    write(stdout, abs(b.low))
-    if b.high != b.low:
-      write(stdout, "..")
-      write(stdout, abs(b.high))
-    write(stdout, " ")
-    write(stdout, b.filename)
-    if b.isActive:
-      write(stdout, " [disabled]\n")
-    else:
-      write(stdout, "\n")
-  write(stdout, EndbEnd)
-
-proc openAppend(filename: cstring): File =
-  var p: pointer = fopen(filename, "ab")
-  if p != nil:
-    result = cast[File](p)
-    write(result, "----------------------------------------\n")
-
-proc dbgRepr(p: pointer, typ: PNimType): string =
-  var cl: ReprClosure
-  initReprClosure(cl)
-  cl.recDepth = maxDisplayRecDepth
-  # locks for the GC turned out to be a bad idea...
-  # inc(recGcLock)
-  result = ""
-  reprAux(result, p, typ, cl)
-  # dec(recGcLock)
-  deinitReprClosure(cl)
-
-proc writeVariable(stream: File, slot: VarSlot) =
-  write(stream, slot.name)
-  write(stream, " = ")
-  writeLine(stream, dbgRepr(slot.address, slot.typ))
-
-proc listFrame(stream: File, f: PFrame) =
-  write(stream, EndbBeg)
-  write(stream, "| Frame (")
-  write(stream, f.len)
-  write(stream, " slots):\n")
-  for i in 0 .. f.len-1:
-    writeLine(stream, getLocal(f, i).name)
-  write(stream, EndbEnd)
-
-proc listLocals(stream: File, f: PFrame) =
-  write(stream, EndbBeg)
-  write(stream, "| Frame (")
-  write(stream, f.len)
-  write(stream, " slots):\n")
-  for i in 0 .. f.len-1:
-    writeVariable(stream, getLocal(f, i))
-  write(stream, EndbEnd)
-
-proc listGlobals(stream: File) =
-  write(stream, EndbBeg)
-  write(stream, "| Globals:\n")
-  for i in 0 .. getGlobalLen()-1:
-    writeLine(stream, getGlobal(i).name)
-  write(stream, EndbEnd)
-
-proc debugOut(msg: cstring) =
-  # the *** *** markers are for easy recognition of debugger
-  # output for external frontends.
-  write(stdout, EndbBeg)
-  write(stdout, "| ")
-  write(stdout, msg)
-  write(stdout, EndbEnd)
-
-proc dbgFatal(msg: cstring) =
-  debugOut(msg)
-  dbgAborting = true # the debugger wants to abort
-  quit(1)
-
-proc dbgShowCurrentProc(dbgFramePointer: PFrame) =
-  if dbgFramePointer != nil:
-    write(stdout, "*** endb| now in proc: ")
-    write(stdout, dbgFramePointer.procname)
-    write(stdout, " ***\n")
-  else:
-    write(stdout, "*** endb| (proc name not available) ***\n")
-
-proc dbgShowExecutionPoint() =
-  write(stdout, "*** endb| ")
-  write(stdout, framePtr.filename)
-  write(stdout, "(")
-  write(stdout, framePtr.line)
-  write(stdout, ") ")
-  write(stdout, framePtr.procname)
-  write(stdout, " ***\n")
-
-proc scanAndAppendWord(src: cstring, a: var StaticStr, start: int): int =
-  result = start
-  # skip whitespace:
-  while src[result] in {'\t', ' '}: inc(result)
-  while true:
-    case src[result]
-    of 'a'..'z', '0'..'9': add(a, src[result])
-    of '_': discard # just skip it
-    of 'A'..'Z': add(a, chr(ord(src[result]) - ord('A') + ord('a')))
-    else: break
-    inc(result)
-
-proc scanWord(src: cstring, a: var StaticStr, start: int): int =
-  setlen(a)
-  result = scanAndAppendWord(src, a, start)
-
-proc scanFilename(src: cstring, a: var StaticStr, start: int): int =
-  result = start
-  setLen a
-  while src[result] in {'\t', ' '}: inc(result)
-  while src[result] notin {'\t', ' ', '\0'}:
-    add(a, src[result])
-    inc(result)
-
-proc scanNumber(src: cstring, a: var int, start: int): int =
-  result = start
-  a = 0
-  while src[result] in {'\t', ' '}: inc(result)
-  while true:
-    case src[result]
-    of '0'..'9': a = a * 10 + ord(src[result]) - ord('0')
-    of '_': discard # skip underscores (nice for long line numbers)
-    else: break
-    inc(result)
-
-proc dbgHelp() =
-  debugOut("""
-list of commands (see the manual for further help):
-              GENERAL
-h, help                 display this help message
-q, quit                 quit the debugger and the program
-<ENTER>                 repeat the previous debugger command
-              EXECUTING
-s, step                 single step, stepping into routine calls
-n, next                 single step, without stepping into routine calls
-f, skipcurrent          continue execution until the current routine finishes
-c, continue, r, run     continue execution until the next breakpoint
-i, ignore               continue execution, ignore all breakpoints
-              BREAKPOINTS
-b, break [fromline [toline]] [file]
-                        set a new breakpoint for line and file
-                        if line or file are omitted the current one is used
-breakpoints             display the entire breakpoint list
-toggle fromline [file]  enable or disable a breakpoint
-filenames               list all valid filenames
-              DATA DISPLAY
-e, eval <expr>          evaluate the expression <expr>
-o, out <file> <expr>    evaluate <expr> and write it to <file>
-w, where                display the current execution point
-stackframe [file]       display current stack frame [and write it to file]
-u, up                   go up in the call stack
-d, down                 go down in the call stack
-bt, backtrace           display the entire call stack
-l, locals               display available local variables
-g, globals              display available global variables
-maxdisplay <integer>    set the display's recursion maximum
-""")
-
-proc invalidCommand() =
-  debugOut("[Warning] invalid command ignored (type 'h' for help) ")
-
-proc hasExt(s: cstring): bool =
-  # returns true if s has a filename extension
-  var i = 0
-  while s[i] != '\0':
-    if s[i] == '.': return true
-    inc i
-
-proc parseBreakpoint(s: cstring, start: int): Breakpoint =
-  var dbgTemp: StaticStr
-  var i = scanNumber(s, result.low, start)
-  if result.low == 0: result.low = framePtr.line
-  i = scanNumber(s, result.high, i)
-  if result.high == 0: result.high = result.low
-  i = scanFilename(s, dbgTemp, i)
-  if dbgTemp.len != 0:
-    if not hasExt(addr dbgTemp.data): add(dbgTemp, ".nim")
-    result.filename = canonFilename(addr dbgTemp.data)
-    if result.filename.isNil:
-      debugOut("[Warning] no breakpoint could be set; unknown filename ")
-      return
-  else:
-    result.filename = framePtr.filename
-
-proc createBreakPoint(s: cstring, start: int) =
-  let br = parseBreakpoint(s, start)
-  if not br.filename.isNil:
-    if not addBreakpoint(br.filename, br.low, br.high):
-      debugOut("[Warning] no breakpoint could be set; out of breakpoint space ")
-
-proc breakpointToggle(s: cstring, start: int) =
-  var a = parseBreakpoint(s, start)
-  if not a.filename.isNil:
-    var b = checkBreakpoints(a.filename, a.low)
-    if not b.isNil: b.flip
-    else: debugOut("[Warning] unknown breakpoint ")
-
-proc dbgEvaluate(stream: File, s: cstring, start: int, f: PFrame) =
-  var dbgTemp: StaticStr
-  var i = scanWord(s, dbgTemp, start)
-  while s[i] in {' ', '\t'}: inc(i)
-  var v: VarSlot
-  if s[i] == '.':
-    inc(i)
-    add(dbgTemp, '.')
-    i = scanAndAppendWord(s, dbgTemp, i)
-    for i in 0 .. getGlobalLen()-1:
-      let v = getGlobal(i)
-      if c_strcmp(v.name, addr dbgTemp.data) == 0:
-        writeVariable(stream, v)
-  else:
-    for i in 0 .. f.len-1:
-      let v = getLocal(f, i)
-      if c_strcmp(v.name, addr dbgTemp.data) == 0:
-        writeVariable(stream, v)
-
-proc dbgOut(s: cstring, start: int, currFrame: PFrame) =
-  var dbgTemp: StaticStr
-  var i = scanFilename(s, dbgTemp, start)
-  if dbgTemp.len == 0:
-    invalidCommand()
-    return
-  var stream = openAppend(addr dbgTemp.data)
-  if stream == nil:
-    debugOut("[Warning] could not open or create file ")
-    return
-  dbgEvaluate(stream, s, i, currFrame)
-  close(stream)
-
-proc dbgStackFrame(s: cstring, start: int, currFrame: PFrame) =
-  var dbgTemp: StaticStr
-  var i = scanFilename(s, dbgTemp, start)
-  if dbgTemp.len == 0:
-    # just write it to stdout:
-    listFrame(stdout, currFrame)
-  else:
-    var stream = openAppend(addr dbgTemp.data)
-    if stream == nil:
-      debugOut("[Warning] could not open or create file ")
-      return
-    listFrame(stream, currFrame)
-    close(stream)
-
-proc readLine(f: File, line: var StaticStr): bool =
-  while true:
-    var c = c_fgetc(f)
-    if c < 0'i32:
-      if line.len > 0: break
-      else: return false
-    if c == 10'i32: break # LF
-    if c == 13'i32:  # CR
-      c = c_fgetc(f) # is the next char LF?
-      if c != 10'i32: discard c_ungetc(c, f) # no, put the character back
-      break
-    add line, chr(int(c))
-  result = true
-
-proc listFilenames() =
-  write(stdout, EndbBeg)
-  write(stdout, "| Files:\n")
-  var i = 0
-  while true:
-    let x = dbgFilenames[i]
-    if x.isNil: break
-    write(stdout, x)
-    write(stdout, "\n")
-    inc i
-  write(stdout, EndbEnd)
-
-proc dbgWriteStackTrace(f: PFrame)
-proc commandPrompt() =
-  # if we return from this routine, user code executes again
-  var
-    again = true
-    dbgFramePtr = framePtr # for going down and up the stack
-    dbgDown = 0 # how often we did go down
-    dbgTemp: StaticStr
-
-  while again:
-    write(stdout, "*** endb| >>")
-    let oldLen = dbgUser.len
-    dbgUser.len = 0
-    if not readLine(stdin, dbgUser): break
-    if dbgUser.len == 0: dbgUser.len = oldLen
-    # now look what we have to do:
-    var i = scanWord(addr dbgUser.data, dbgTemp, 0)
-    template `?`(x: untyped): untyped = dbgTemp == cstring(x)
-    if ?"s" or ?"step":
-      dbgState = dbStepInto
-      again = false
-    elif ?"n" or ?"next":
-      dbgState = dbStepOver
-      dbgSkipToFrame = framePtr
-      again = false
-    elif ?"f" or ?"skipcurrent":
-      dbgState = dbSkipCurrent
-      dbgSkipToFrame = framePtr.prev
-      again = false
-    elif ?"c" or ?"continue" or ?"r" or ?"run":
-      dbgState = dbBreakpoints
-      again = false
-    elif ?"i" or ?"ignore":
-      dbgState = dbOff
-      again = false
-    elif ?"h" or ?"help":
-      dbgHelp()
-    elif ?"q" or ?"quit":
-      dbgState = dbQuiting
-      dbgAborting = true
-      again = false
-      quit(1) # BUGFIX: quit with error code > 0
-    elif ?"e" or ?"eval":
-      var
-        prevState = dbgState
-        prevSkipFrame = dbgSkipToFrame
-      dbgState = dbSkipCurrent
-      dbgEvaluate(stdout, addr dbgUser.data, i, dbgFramePtr)
-      dbgState = prevState
-      dbgSkipToFrame = prevSkipFrame
-    elif ?"o" or ?"out":
-      dbgOut(addr dbgUser.data, i, dbgFramePtr)
-    elif ?"stackframe":
-      dbgStackFrame(addr dbgUser.data, i, dbgFramePtr)
-    elif ?"w" or ?"where":
-      dbgShowExecutionPoint()
-    elif ?"l" or ?"locals":
-      var
-        prevState = dbgState
-        prevSkipFrame = dbgSkipToFrame
-      dbgState = dbSkipCurrent
-      listLocals(stdout, dbgFramePtr)
-      dbgState = prevState
-      dbgSkipToFrame = prevSkipFrame
-    elif ?"g" or ?"globals":
-      var
-        prevState = dbgState
-        prevSkipFrame = dbgSkipToFrame
-      dbgState = dbSkipCurrent
-      listGlobals(stdout)
-      dbgState = prevState
-      dbgSkipToFrame = prevSkipFrame
-    elif ?"u" or ?"up":
-      if dbgDown <= 0:
-        debugOut("[Warning] cannot go up any further ")
-      else:
-        dbgFramePtr = framePtr
-        for j in 0 .. dbgDown-2: # BUGFIX
-          dbgFramePtr = dbgFramePtr.prev
-        dec(dbgDown)
-      dbgShowCurrentProc(dbgFramePtr)
-    elif ?"d" or ?"down":
-      if dbgFramePtr != nil:
-        inc(dbgDown)
-        dbgFramePtr = dbgFramePtr.prev
-        dbgShowCurrentProc(dbgFramePtr)
-      else:
-        debugOut("[Warning] cannot go down any further ")
-    elif ?"bt" or ?"backtrace":
-      dbgWriteStackTrace(framePtr)
-    elif ?"b" or ?"break":
-      createBreakPoint(addr dbgUser.data, i)
-    elif ?"breakpoints":
-      listBreakPoints()
-    elif ?"toggle":
-      breakpointToggle(addr dbgUser.data, i)
-    elif ?"filenames":
-      listFilenames()
-    elif ?"maxdisplay":
-      var parsed: int
-      i = scanNumber(addr dbgUser.data, parsed, i)
-      if dbgUser.data[i-1] in {'0'..'9'}:
-        if parsed == 0: maxDisplayRecDepth = -1
-        else: maxDisplayRecDepth = parsed
-      else:
-        invalidCommand()
-    else: invalidCommand()
-
-proc endbStep() =
-  # we get into here if an unhandled exception has been raised
-  # XXX: do not allow the user to run the program any further?
-  # XXX: BUG: the frame is lost here!
-  dbgShowExecutionPoint()
-  commandPrompt()
-
-proc dbgWriteStackTrace(f: PFrame) =
-  const
-    firstCalls = 32
-  var
-    it = f
-    i = 0
-    total = 0
-    tempFrames: array[0..127, PFrame]
-  # setup long head:
-  while it != nil and i <= high(tempFrames)-firstCalls:
-    tempFrames[i] = it
-    inc(i)
-    inc(total)
-    it = it.prev
-  # go up the stack to count 'total':
-  var b = it
-  while it != nil:
-    inc(total)
-    it = it.prev
-  var skipped = 0
-  if total > len(tempFrames):
-    # skip N
-    skipped = total-i-firstCalls+1
-    for j in 1..skipped:
-      if b != nil: b = b.prev
-    # create '...' entry:
-    tempFrames[i] = nil
-    inc(i)
-  # setup short tail:
-  while b != nil and i <= high(tempFrames):
-    tempFrames[i] = b
-    inc(i)
-    b = b.prev
-  for j in countdown(i-1, 0):
-    if tempFrames[j] == nil:
-      write(stdout, "(")
-      write(stdout, skipped)
-      write(stdout, " calls omitted) ...")
-    else:
-      write(stdout, tempFrames[j].filename)
-      if tempFrames[j].line > 0:
-        write(stdout, '(')
-        write(stdout, tempFrames[j].line)
-        write(stdout, ')')
-      write(stdout, ' ')
-      write(stdout, tempFrames[j].procname)
-    write(stdout, "\n")
-
-proc checkForBreakpoint =
-  let b = checkBreakpoints(framePtr.filename, framePtr.line)
-  if b != nil:
-    write(stdout, "*** endb| reached ")
-    write(stdout, framePtr.filename)
-    write(stdout, "(")
-    write(stdout, framePtr.line)
-    write(stdout, ") ")
-    write(stdout, framePtr.procname)
-    write(stdout, " ***\n")
-    commandPrompt()
-
-proc lineHookImpl() {.nimcall.} =
-  case dbgState
-  of dbStepInto:
-    # we really want the command prompt here:
-    dbgShowExecutionPoint()
-    commandPrompt()
-  of dbSkipCurrent, dbStepOver: # skip current routine
-    if framePtr == dbgSkipToFrame:
-      dbgShowExecutionPoint()
-      commandPrompt()
-    else:
-      # breakpoints are wanted though (I guess)
-      checkForBreakpoint()
-  of dbBreakpoints:
-    # debugger is only interested in breakpoints
-    checkForBreakpoint()
-  else: discard
-
-proc watchpointHookImpl(name: cstring) {.nimcall.} =
-  dbgWriteStackTrace(framePtr)
-  debugOut(name)
-
-proc initDebugger {.inline.} =
-  dbgState = dbStepInto
-  dbgUser.len = 1
-  dbgUser.data[0] = 's'
-  dbgWatchpointHook = watchpointHookImpl
-  dbgLineHook = lineHookImpl
diff --git a/lib/system/exceptions.nim b/lib/system/exceptions.nim
new file mode 100644
index 000000000..63588f858
--- /dev/null
+++ b/lib/system/exceptions.nim
@@ -0,0 +1,122 @@
+## Exception and effect types used in Nim code.
+
+type
+  TimeEffect* = object of RootEffect   ## Time effect.
+  IOEffect* = object of RootEffect     ## IO effect.
+  ReadIOEffect* = object of IOEffect   ## Effect describing a read IO operation.
+  WriteIOEffect* = object of IOEffect  ## Effect describing a write IO operation.
+  ExecIOEffect* = object of IOEffect   ## Effect describing an executing IO operation.
+
+type
+  IOError* = object of CatchableError ## \
+    ## Raised if an IO error occurred.
+  EOFError* = object of IOError ## \
+    ## Raised if an IO "end of file" error occurred.
+  OSError* = object of CatchableError ## \
+    ## Raised if an operating system service failed.
+    errorCode*: int32 ## OS-defined error code describing this error.
+  LibraryError* = object of OSError ## \
+    ## Raised if a dynamic library could not be loaded.
+  ResourceExhaustedError* = object of CatchableError ## \
+    ## Raised if a resource request could not be fulfilled.
+  ArithmeticDefect* = object of Defect ## \
+    ## Raised if any kind of arithmetic error occurred.
+  DivByZeroDefect* = object of ArithmeticDefect ## \
+    ## Raised for runtime integer divide-by-zero errors.
+
+  OverflowDefect* = object of ArithmeticDefect ## \
+    ## Raised for runtime integer overflows.
+    ##
+    ## This happens for calculations whose results are too large to fit in the
+    ## provided bits.
+  AccessViolationDefect* = object of Defect ## \
+    ## Raised for invalid memory access errors
+  AssertionDefect* = object of Defect ## \
+    ## Raised when assertion is proved wrong.
+    ##
+    ## Usually the result of using the `assert() template
+    ## <assertions.html#assert.t,untyped,string>`_.
+  ValueError* = object of CatchableError ## \
+    ## Raised for string and object conversion errors.
+  KeyError* = object of ValueError ## \
+    ## Raised if a key cannot be found in a table.
+    ##
+    ## Mostly used by the `tables <tables.html>`_ module, it can also be raised
+    ## by other collection modules like `sets <sets.html>`_ or `strtabs
+    ## <strtabs.html>`_.
+  OutOfMemDefect* = object of Defect ## \
+    ## Raised for unsuccessful attempts to allocate memory.
+  IndexDefect* = object of Defect ## \
+    ## Raised if an array index is out of bounds.
+
+  FieldDefect* = object of Defect ## \
+    ## Raised if a record field is not accessible because its discriminant's
+    ## value does not fit.
+  RangeDefect* = object of Defect ## \
+    ## Raised if a range check error occurred.
+  StackOverflowDefect* = object of Defect ## \
+    ## Raised if the hardware stack used for subroutine calls overflowed.
+  ReraiseDefect* = object of Defect ## \
+    ## Raised if there is no exception to reraise.
+  ObjectAssignmentDefect* = object of Defect ## \
+    ## Raised if an object gets assigned to its parent's object.
+  ObjectConversionDefect* = object of Defect ## \
+    ## Raised if an object is converted to an incompatible object type.
+    ## You can use `of` operator to check if conversion will succeed.
+  FloatingPointDefect* = object of Defect ## \
+    ## Base class for floating point exceptions.
+  FloatInvalidOpDefect* = object of FloatingPointDefect ## \
+    ## Raised by invalid operations according to IEEE.
+    ##
+    ## Raised by `0.0/0.0`, for example.
+  FloatDivByZeroDefect* = object of FloatingPointDefect ## \
+    ## Raised by division by zero.
+    ##
+    ## Divisor is zero and dividend is a finite nonzero number.
+  FloatOverflowDefect* = object of FloatingPointDefect ## \
+    ## Raised for overflows.
+    ##
+    ## The operation produced a result that exceeds the range of the exponent.
+  FloatUnderflowDefect* = object of FloatingPointDefect ## \
+    ## Raised for underflows.
+    ##
+    ## The operation produced a result that is too small to be represented as a
+    ## normal number.
+  FloatInexactDefect* = object of FloatingPointDefect ## \
+    ## Raised for inexact results.
+    ##
+    ## The operation produced a result that cannot be represented with infinite
+    ## precision -- for example: `2.0 / 3.0, log(1.1)`
+    ##
+    ## **Note**: Nim currently does not detect these!
+  DeadThreadDefect* = object of Defect ## \
+    ## Raised if it is attempted to send a message to a dead thread.
+  NilAccessDefect* = object of Defect ## \
+    ## Raised on dereferences of `nil` pointers.
+    ##
+    ## This is only raised if the `segfaults module <segfaults.html>`_ was imported!
+
+when not defined(nimPreviewSlimSystem):
+  type
+    ArithmeticError* {.deprecated: "See corresponding Defect".} = ArithmeticDefect
+    DivByZeroError* {.deprecated: "See corresponding Defect".} = DivByZeroDefect
+    OverflowError* {.deprecated: "See corresponding Defect".} = OverflowDefect
+    AccessViolationError* {.deprecated: "See corresponding Defect".} = AccessViolationDefect
+    AssertionError* {.deprecated: "See corresponding Defect".} = AssertionDefect
+    OutOfMemError* {.deprecated: "See corresponding Defect".} = OutOfMemDefect
+    IndexError* {.deprecated: "See corresponding Defect".} = IndexDefect
+
+    FieldError* {.deprecated: "See corresponding Defect".} = FieldDefect
+    RangeError* {.deprecated: "See corresponding Defect".} = RangeDefect
+    StackOverflowError* {.deprecated: "See corresponding Defect".} = StackOverflowDefect
+    ReraiseError* {.deprecated: "See corresponding Defect".} = ReraiseDefect
+    ObjectAssignmentError* {.deprecated: "See corresponding Defect".} = ObjectAssignmentDefect
+    ObjectConversionError* {.deprecated: "See corresponding Defect".} = ObjectConversionDefect
+    FloatingPointError* {.deprecated: "See corresponding Defect".} = FloatingPointDefect
+    FloatInvalidOpError* {.deprecated: "See corresponding Defect".} = FloatInvalidOpDefect
+    FloatDivByZeroError* {.deprecated: "See corresponding Defect".} = FloatDivByZeroDefect
+    FloatOverflowError* {.deprecated: "See corresponding Defect".} = FloatOverflowDefect
+    FloatUnderflowError* {.deprecated: "See corresponding Defect".} = FloatUnderflowDefect
+    FloatInexactError* {.deprecated: "See corresponding Defect".} = FloatInexactDefect
+    DeadThreadError* {.deprecated: "See corresponding Defect".} = DeadThreadDefect
+    NilAccessError* {.deprecated: "See corresponding Defect".} = NilAccessDefect
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index dabfe010e..dae5c4a4a 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -10,34 +10,55 @@
 # Exception handling code. Carefully coded so that tiny programs which do not
 # use the heap (and nor exceptions) do not include the GC or memory allocator.
 
+import std/private/miscdollars
+import stacktraces
+
+const noStacktraceAvailable = "No stack traceback available\n"
+
 var
   errorMessageWriter*: (proc(msg: string) {.tags: [WriteIOEffect], benign,
                                             nimcall.})
     ## Function that will be called
-    ## instead of stdmsg.write when printing stacktrace.
+    ## instead of `stdmsg.write` when printing stacktrace.
     ## Unstable API.
 
-when not defined(windows) or not defined(guiapp):
-  proc writeToStdErr(msg: cstring) = write(stdmsg, msg)
+when defined(windows):
+  proc GetLastError(): int32 {.header: "<windows.h>", nodecl.}
+  const ERROR_BAD_EXE_FORMAT = 193
 
+when not defined(windows) or not defined(guiapp):
+  proc writeToStdErr(msg: cstring) = rawWrite(cstderr, msg)
+  proc writeToStdErr(msg: cstring, length: int) =
+    rawWriteString(cstderr, msg, length)
 else:
-  proc MessageBoxA(hWnd: cint, lpText, lpCaption: cstring, uType: int): int32 {.
+  proc MessageBoxA(hWnd: pointer, lpText, lpCaption: cstring, uType: int): int32 {.
     header: "<windows.h>", nodecl.}
-
   proc writeToStdErr(msg: cstring) =
-    discard MessageBoxA(0, msg, nil, 0)
+    discard MessageBoxA(nil, msg, nil, 0)
+  proc writeToStdErr(msg: cstring, length: int) =
+    discard MessageBoxA(nil, msg, nil, 0)
+
+proc writeToStdErr(msg: string) {.inline.} =
+  # fix bug #13115: handles correctly '\0' unlike default implicit conversion to cstring
+  writeToStdErr(msg.cstring, msg.len)
 
-proc showErrorMessage(data: cstring) {.gcsafe.} =
+proc showErrorMessage(data: cstring, length: int) {.gcsafe, raises: [].} =
+  var toWrite = true
   if errorMessageWriter != nil:
-    errorMessageWriter($data)
-  else:
-    writeToStdErr(data)
+    try:
+      errorMessageWriter($data)
+      toWrite = false
+    except:
+      discard
+  if toWrite:
+    when defined(genode):
+      # stderr not available by default, use the LOG session
+      echo data
+    else:
+      writeToStdErr(data, length)
 
-proc quitOrDebug() {.inline.} =
-  when not defined(endb):
-    quit(1)
-  else:
-    endbStep() # call the debugger
+proc showErrorMessage2(data: string) {.inline.} =
+  showErrorMessage(data.cstring, data.len)
 
 proc chckIndx(i, a, b: int): int {.inline, compilerproc, benign.}
 proc chckRange(i, a, b: int): int {.inline, compilerproc, benign.}
@@ -50,28 +71,47 @@ type
     len: int
     prev: ptr GcFrameHeader
 
+when NimStackTraceMsgs:
+  var frameMsgBuf* {.threadvar.}: string
+
+when not defined(nimV2):
+  var
+    framePtr {.threadvar.}: PFrame
+
 var
-  framePtr {.threadvar.}: PFrame
-  excHandler {.threadvar.}: PSafePoint
-    # list of exception handlers
-    # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
-  raise_counter {.threadvar.}: uint
 
-  gcFramePtr {.threadvar.}: GcFrame
+when not gotoBasedExceptions:
+  var
+    excHandler {.threadvar.}: PSafePoint
+      # list of exception handlers
+      # a global variable for the root of all try blocks
+    gcFramePtr {.threadvar.}: GcFrame
 
-type
-  FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
-                     excHandler: PSafePoint, currException: ref Exception]
+when gotoBasedExceptions:
+  type
+    FrameState = tuple[framePtr: PFrame,
+                      currException: ref Exception]
+else:
+  type
+    FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
+                      excHandler: PSafePoint, currException: ref Exception]
 
 proc getFrameState*(): FrameState {.compilerRtl, inl.} =
-  return (gcFramePtr, framePtr, excHandler, currException)
+  when gotoBasedExceptions:
+    return (framePtr, currException)
+  else:
+    return (gcFramePtr, framePtr, excHandler, currException)
 
 proc setFrameState*(state: FrameState) {.compilerRtl, inl.} =
-  gcFramePtr = state.gcFramePtr
-  framePtr = state.framePtr
-  excHandler = state.excHandler
-  currException = state.currException
+  when gotoBasedExceptions:
+    framePtr = state.framePtr
+    currException = state.currException
+  else:
+    gcFramePtr = state.gcFramePtr
+    framePtr = state.framePtr
+    excHandler = state.excHandler
+    currException = state.currException
 
 proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
 
@@ -93,54 +133,44 @@ when false:
 proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
-proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
-proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
-proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
-proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
-  s.prev = gcFramePtr
-  zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
-  gcFramePtr = s
+when not gotoBasedExceptions:
+  proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
+  proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
+  proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
+  proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
+    s.prev = gcFramePtr
+    zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
+    gcFramePtr = s
 
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
-  s.hasRaiseAction = false
-  s.prev = excHandler
-  excHandler = s
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
+    s.prev = excHandler
+    excHandler = s
 
-proc popSafePoint {.compilerRtl, inl.} =
-  excHandler = excHandler.prev
+  proc popSafePoint {.compilerRtl, inl.} =
+    excHandler = excHandler.prev
 
-proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} =
+proc pushCurrentException(e: sink(ref Exception)) {.compilerRtl, inl.} =
   e.up = currException
   currException = e
+  #showErrorMessage2 "A"
 
 proc popCurrentException {.compilerRtl, inl.} =
   currException = currException.up
+  #showErrorMessage2 "B"
 
 proc popCurrentExceptionEx(id: uint) {.compilerRtl.} =
-  # in cpp backend exceptions can pop-up in the different order they were raised, example #5628
-  if currException.raise_id == id:
-    currException = currException.up
-  else:
-    var cur = currException.up
-    var prev = currException
-    while cur != nil and cur.raise_id != id:
-      prev = cur
-      cur = cur.up
-    if cur == nil:
-      showErrorMessage("popCurrentExceptionEx() exception was not found in the exception stack. Aborting...")
-      quitOrDebug()
-    prev.up = cur.up
+  discard "only for bootstrapping compatbility"
 
 proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
-  if not e.isNil:
-    currException = e
+  currException = e
 
 # some platforms have native support for stack traces:
 const
-  nativeStackTraceSupported* = (defined(macosx) or defined(linux)) and
+  nativeStackTraceSupported = (defined(macosx) or defined(linux)) and
                               not NimStackTrace
-  hasSomeStackTrace = NimStackTrace or
-    defined(nativeStackTrace) and nativeStackTraceSupported
+  hasSomeStackTrace = NimStackTrace or defined(nimStackTraceOverride) or
+    (defined(nativeStackTrace) and nativeStackTraceSupported)
+
 
 when defined(nativeStacktrace) and nativeStackTraceSupported:
   type
@@ -158,13 +188,13 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
   when not hasThreadSupport:
     var
-      tempAddresses: array[0..127, pointer] # should not be alloc'd on stack
+      tempAddresses: array[maxStackTraceLines, pointer] # should not be alloc'd on stack
       tempDlInfo: TDl_info
 
   proc auxWriteStackTraceWithBacktrace(s: var string) =
     when hasThreadSupport:
       var
-        tempAddresses: array[0..127, pointer] # but better than a threadvar
+        tempAddresses: array[maxStackTraceLines, pointer] # but better than a threadvar
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
@@ -190,13 +220,9 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
           # interested in
           enabled = true
 
-when not hasThreadSupport:
+when hasSomeStackTrace and not hasThreadSupport:
   var
-    tempFrames: array[0..127, PFrame] # should not be alloc'd on stack
-
-const
-  reraisedFromBegin = -10
-  reraisedFromEnd = -100
+    tempFrames: array[maxStackTraceLines, PFrame] # should not be alloc'd on stack
 
 template reraisedFrom(z): untyped =
   StackTraceEntry(procname: nil, line: z, filename: nil)
@@ -209,87 +235,106 @@ proc auxWriteStackTrace(f: PFrame; s: var seq[StackTraceEntry]) =
     inc(i)
     it = it.prev
   var last = i-1
-  if s.isNil:
-    s = newSeq[StackTraceEntry](i)
-  else:
-    last = s.len + i - 1
-    s.setLen(last+1)
+  when true: # not defined(gcDestructors):
+    if s.len == 0:
+      s = newSeq[StackTraceEntry](i)
+    else:
+      last = s.len + i - 1
+      s.setLen(last+1)
   it = f
   while it != nil:
     s[last] = StackTraceEntry(procname: it.procname,
                               line: it.line,
                               filename: it.filename)
+    when NimStackTraceMsgs:
+      let first = if it.prev == nil: 0 else: it.prev.frameMsgLen
+      if it.frameMsgLen > first:
+        s[last].frameMsg.setLen(it.frameMsgLen - first)
+        # somehow string slicing not available here
+        for i in first .. it.frameMsgLen-1:
+          s[last].frameMsg[i-first] = frameMsgBuf[i]
     it = it.prev
     dec last
 
-template addFrameEntry(s, f: untyped) =
+template addFrameEntry(s: var string, f: StackTraceEntry|PFrame) =
   var oldLen = s.len
-  add(s, f.filename)
-  if f.line > 0:
-    add(s, '(')
-    add(s, $f.line)
-    add(s, ')')
+  s.toLocation(f.filename, f.line, 0)
   for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ')
   add(s, f.procname)
+  when NimStackTraceMsgs:
+    when typeof(f) is StackTraceEntry:
+      add(s, f.frameMsg)
+    else:
+      var first = if f.prev == nil: 0 else: f.prev.frameMsgLen
+      for i in first..<f.frameMsgLen: add(s, frameMsgBuf[i])
   add(s, "\n")
 
-proc `$`(s: seq[StackTraceEntry]): string =
+proc `$`(stackTraceEntries: seq[StackTraceEntry]): string =
+  when defined(nimStackTraceOverride):
+    let s = addDebuggingInfo(stackTraceEntries)
+  else:
+    let s = stackTraceEntries
+
   result = newStringOfCap(2000)
   for i in 0 .. s.len-1:
     if s[i].line == reraisedFromBegin: result.add "[[reraised from:\n"
     elif s[i].line == reraisedFromEnd: result.add "]]\n"
     else: addFrameEntry(result, s[i])
 
-proc auxWriteStackTrace(f: PFrame, s: var string) =
-  when hasThreadSupport:
+when hasSomeStackTrace:
+
+  proc auxWriteStackTrace(f: PFrame, s: var string) =
+    when hasThreadSupport:
+      var
+        tempFrames: array[maxStackTraceLines, PFrame] # but better than a threadvar
+    const
+      firstCalls = 32
     var
-      tempFrames: array[0..127, PFrame] # but better than a threadvar
-  const
-    firstCalls = 32
-  var
-    it = f
-    i = 0
-    total = 0
-  # setup long head:
-  while it != nil and i <= high(tempFrames)-firstCalls:
-    tempFrames[i] = it
-    inc(i)
-    inc(total)
-    it = it.prev
-  # go up the stack to count 'total':
-  var b = it
-  while it != nil:
-    inc(total)
-    it = it.prev
-  var skipped = 0
-  if total > len(tempFrames):
-    # skip N
-    skipped = total-i-firstCalls+1
-    for j in 1..skipped:
-      if b != nil: b = b.prev
-    # create '...' entry:
-    tempFrames[i] = nil
-    inc(i)
-  # setup short tail:
-  while b != nil and i <= high(tempFrames):
-    tempFrames[i] = b
-    inc(i)
-    b = b.prev
-  for j in countdown(i-1, 0):
-    if tempFrames[j] == nil:
-      add(s, "(")
-      add(s, $skipped)
-      add(s, " calls omitted) ...\n")
-    else:
-      addFrameEntry(s, tempFrames[j])
+      it = f
+      i = 0
+      total = 0
+    # setup long head:
+    while it != nil and i <= high(tempFrames)-firstCalls:
+      tempFrames[i] = it
+      inc(i)
+      inc(total)
+      it = it.prev
+    # go up the stack to count 'total':
+    var b = it
+    while it != nil:
+      inc(total)
+      it = it.prev
+    var skipped = 0
+    if total > len(tempFrames):
+      # skip N
+      skipped = total-i-firstCalls+1
+      for j in 1..skipped:
+        if b != nil: b = b.prev
+      # create '...' entry:
+      tempFrames[i] = nil
+      inc(i)
+    # setup short tail:
+    while b != nil and i <= high(tempFrames):
+      tempFrames[i] = b
+      inc(i)
+      b = b.prev
+    for j in countdown(i-1, 0):
+      if tempFrames[j] == nil:
+        add(s, "(")
+        add(s, $skipped)
+        add(s, " calls omitted) ...\n")
+      else:
+        addFrameEntry(s, tempFrames[j])
 
-proc stackTraceAvailable*(): bool
+  proc stackTraceAvailable*(): bool
 
-when hasSomeStackTrace:
   proc rawWriteStackTrace(s: var string) =
-    when NimStackTrace:
+    when defined(nimStackTraceOverride):
+      add(s, "Traceback (most recent call last, using override)\n")
+      auxWriteStackTraceWithOverride(s)
+    elif NimStackTrace:
       if framePtr == nil:
-        add(s, "No stack traceback available\n")
+        add(s, noStacktraceAvailable)
       else:
         add(s, "Traceback (most recent call last)\n")
         auxWriteStackTrace(framePtr, s)
@@ -297,16 +342,20 @@ when hasSomeStackTrace:
       add(s, "Traceback from system (most recent call last)\n")
       auxWriteStackTraceWithBacktrace(s)
     else:
-      add(s, "No stack traceback available\n")
+      add(s, noStacktraceAvailable)
 
   proc rawWriteStackTrace(s: var seq[StackTraceEntry]) =
-    when NimStackTrace:
+    when defined(nimStackTraceOverride):
+      auxWriteStackTraceWithOverride(s)
+    elif NimStackTrace:
       auxWriteStackTrace(framePtr, s)
     else:
-      s = nil
+      s = @[]
 
   proc stackTraceAvailable(): bool =
-    when NimStackTrace:
+    when defined(nimStackTraceOverride):
+      result = true
+    elif NimStackTrace:
       if framePtr == nil:
         result = false
       else:
@@ -319,154 +368,272 @@ else:
   proc stackTraceAvailable*(): bool = result = false
 
 var onUnhandledException*: (proc (errorMsg: string) {.
-  nimcall.}) ## set this error \
+  nimcall, gcsafe.}) ## Set this error \
   ## handler to override the existing behaviour on an unhandled exception.
-  ## The default is to write a stacktrace to ``stderr`` and then call ``quit(1)``.
+  ##
+  ## The default is to write a stacktrace to `stderr` and then call `quit(1)`.
   ## Unstable API.
 
-template unhandled(buf, body) =
-  if onUnhandledException != nil:
-    onUnhandledException($buf)
+proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy, gcsafe.} =
+  when hasSomeStackTrace:
+    var buf = newStringOfCap(2000)
+    if e.trace.len == 0:
+      rawWriteStackTrace(buf)
+    else:
+      var trace = $e.trace
+      add(buf, trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
+    add(buf, "Error: unhandled exception: ")
+    add(buf, e.msg)
+    add(buf, " [")
+    add(buf, $e.name)
+    add(buf, "]\n")
+
+    if onUnhandledException != nil:
+      onUnhandledException(buf)
+    else:
+      showErrorMessage2(buf)
+    {.gcsafe.}:
+      `=destroy`(buf)
   else:
-    body
+    # ugly, but avoids heap allocations :-)
+    template xadd(buf, s, slen) =
+      if L + slen < high(buf):
+        copyMem(addr(buf[L]), (when s is cstring: s else: cstring(s)), slen)
+        inc L, slen
+    template add(buf, s) =
+      xadd(buf, s, s.len)
+    var buf: array[0..2000, char]
+    var L = 0
+    if e.trace.len != 0:
+      var trace = $e.trace
+      add(buf, trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
+    add(buf, "Error: unhandled exception: ")
+    add(buf, e.msg)
+    add(buf, " [")
+    xadd(buf, e.name, e.name.len)
+    add(buf, "]\n")
+    if onUnhandledException != nil:
+      onUnhandledException($cast[cstring](buf.addr))
+    else:
+      showErrorMessage(cast[cstring](buf.addr), L)
+
+proc reportUnhandledError(e: ref Exception) {.nodestroy, gcsafe.} =
+  if unhandledExceptionHook != nil:
+    unhandledExceptionHook(e)
+  when hostOS != "any":
+    reportUnhandledErrorAux(e)
+
+when not gotoBasedExceptions:
+  proc nimLeaveFinally() {.compilerRtl.} =
+    when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+      {.emit: "throw;".}
+    else:
+      if excHandler != nil:
+        c_longjmp(excHandler.context, 1)
+      else:
+        reportUnhandledError(currException)
+        rawQuit(1)
+
+when gotoBasedExceptions:
+  var nimInErrorMode {.threadvar.}: bool
+
+  proc nimErrorFlag(): ptr bool {.compilerRtl, inl.} =
+    result = addr(nimInErrorMode)
+
+  proc nimTestErrorFlag() {.compilerRtl.} =
+    ## This proc must be called before `currException` is destroyed.
+    ## It also must be called at the end of every thread to ensure no
+    ## error is swallowed.
+    if nimInErrorMode and currException != nil:
+      reportUnhandledError(currException)
+      currException = nil
+      rawQuit(1)
+
+proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
+  when defined(nimPanics):
+    if e of Defect:
+      reportUnhandledError(e)
+      rawQuit(1)
 
-proc raiseExceptionAux(e: ref Exception) =
   if localRaiseHook != nil:
     if not localRaiseHook(e): return
   if globalRaiseHook != nil:
     if not globalRaiseHook(e): return
-  when defined(cpp) and not defined(noCppExceptions):
-    if e[] of OutOfMemError:
-      showErrorMessage(e.name)
-      quitOrDebug()
+  when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+    if e == currException:
+      {.emit: "throw;".}
     else:
       pushCurrentException(e)
-      raise_counter.inc
-      if raise_counter == 0:
-        raise_counter.inc # skip zero at overflow
-      e.raise_id = raise_counter
-      {.emit: "`e`->raise();".}
+      {.emit: "throw `e`;".}
+  elif quirkyExceptions or gotoBasedExceptions:
+    pushCurrentException(e)
+    when gotoBasedExceptions:
+      inc nimInErrorMode
   else:
     if excHandler != nil:
-      if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
-        pushCurrentException(e)
-        c_longjmp(excHandler.context, 1)
-    elif e[] of OutOfMemError:
-      showErrorMessage(e.name)
-      quitOrDebug()
+      pushCurrentException(e)
+      c_longjmp(excHandler.context, 1)
     else:
-      when hasSomeStackTrace:
-        var buf = newStringOfCap(2000)
-        if isNil(e.trace): rawWriteStackTrace(buf)
-        else: add(buf, $e.trace)
-        add(buf, "Error: unhandled exception: ")
-        if not isNil(e.msg): add(buf, e.msg)
-        add(buf, " [")
-        add(buf, $e.name)
-        add(buf, "]\n")
-        unhandled(buf):
-          showErrorMessage(buf)
-          quitOrDebug()
-      else:
-        # ugly, but avoids heap allocations :-)
-        template xadd(buf, s, slen) =
-          if L + slen < high(buf):
-            copyMem(addr(buf[L]), cstring(s), slen)
-            inc L, slen
-        template add(buf, s) =
-          xadd(buf, s, s.len)
-        var buf: array[0..2000, char]
-        var L = 0
-        add(buf, "Error: unhandled exception: ")
-        if not isNil(e.msg): add(buf, e.msg)
-        add(buf, " [")
-        xadd(buf, e.name, e.name.len)
-        add(buf, "]\n")
-        when defined(nimNoArrayToCstringConversion):
-          template tbuf(): untyped = addr buf
-        else:
-          template tbuf(): untyped = buf
-        unhandled(tbuf()):
-          showErrorMessage(tbuf())
-          quitOrDebug()
+      reportUnhandledError(e)
+      rawQuit(1)
 
-proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
+proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
+                      line: int) {.compilerRtl, nodestroy.} =
   if e.name.isNil: e.name = ename
   when hasSomeStackTrace:
-    if e.trace.isNil:
-      rawWriteStackTrace(e.trace)
-    elif framePtr != nil:
-      e.trace.add reraisedFrom(reraisedFromBegin)
-      auxWriteStackTrace(framePtr, e.trace)
-      e.trace.add reraisedFrom(reraisedFromEnd)
+    when defined(nimStackTraceOverride):
+      if e.trace.len == 0:
+        rawWriteStackTrace(e.trace)
+      else:
+        e.trace.add reraisedFrom(reraisedFromBegin)
+        auxWriteStackTraceWithOverride(e.trace)
+        e.trace.add reraisedFrom(reraisedFromEnd)
+    elif NimStackTrace:
+      if e.trace.len == 0:
+        rawWriteStackTrace(e.trace)
+      elif framePtr != nil:
+        e.trace.add reraisedFrom(reraisedFromBegin)
+        auxWriteStackTrace(framePtr, e.trace)
+        e.trace.add reraisedFrom(reraisedFromEnd)
+  else:
+    if procname != nil and filename != nil:
+      e.trace.add StackTraceEntry(procname: procname, filename: filename, line: line)
   raiseExceptionAux(e)
 
+proc raiseException(e: sink(ref Exception), ename: cstring) {.compilerRtl.} =
+  raiseExceptionEx(e, ename, nil, nil, 0)
+
 proc reraiseException() {.compilerRtl.} =
   if currException == nil:
-    sysFatal(ReraiseError, "no exception to reraise")
+    sysFatal(ReraiseDefect, "no exception to reraise")
   else:
-    raiseExceptionAux(currException)
+    when gotoBasedExceptions:
+      inc nimInErrorMode
+    else:
+      raiseExceptionAux(currException)
+
+proc threadTrouble() =
+  # also forward declared, it is 'raises: []' hence the try-except.
+  try:
+    if currException != nil: reportUnhandledError(currException)
+  except:
+    discard
+  rawQuit 1
 
 proc writeStackTrace() =
   when hasSomeStackTrace:
     var s = ""
     rawWriteStackTrace(s)
-    cast[proc (s: cstring) {.noSideEffect, tags: [], nimcall.}](showErrorMessage)(s)
   else:
-    cast[proc (s: cstring) {.noSideEffect, tags: [], nimcall.}](showErrorMessage)("No stack traceback available\n")
+    let s = noStacktraceAvailable
+  cast[proc (s: string) {.noSideEffect, tags: [], nimcall, raises: [].}](showErrorMessage2)(s)
 
 proc getStackTrace(): string =
   when hasSomeStackTrace:
     result = ""
     rawWriteStackTrace(result)
   else:
-    result = "No stack traceback available\n"
+    result = noStacktraceAvailable
 
 proc getStackTrace(e: ref Exception): string =
-  if not isNil(e) and not isNil(e.trace):
+  if not isNil(e):
     result = $e.trace
   else:
     result = ""
 
-proc getStackTraceEntries*(e: ref Exception): seq[StackTraceEntry] =
-  ## Returns the attached stack trace to the exception ``e`` as
-  ## a ``seq``. This is not yet available for the JS backend.
-  shallowCopy(result, e.trace)
-
-when defined(nimRequiresNimFrame):
-  proc stackOverflow() {.noinline.} =
-    writeStackTrace()
-    showErrorMessage("Stack overflow\n")
-    quitOrDebug()
-
-  proc nimFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} =
-    s.calldepth = if framePtr == nil: 0 else: framePtr.calldepth+1
-    s.prev = framePtr
-    framePtr = s
-    if s.calldepth == 2000: stackOverflow()
-else:
-  proc pushFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} =
-    # XXX only for backwards compatibility
-    s.prev = framePtr
-    framePtr = s
+proc getStackTraceEntries*(e: ref Exception): lent seq[StackTraceEntry] =
+  ## Returns the attached stack trace to the exception `e` as
+  ## a `seq`. This is not yet available for the JS backend.
+  e.trace
 
-when defined(endb):
-  var
-    dbgAborting: bool # whether the debugger wants to abort
+proc getStackTraceEntries*(): seq[StackTraceEntry] =
+  ## Returns the stack trace entries for the current stack trace.
+  ## This is not yet available for the JS backend.
+  when hasSomeStackTrace:
+    rawWriteStackTrace(result)
+
+const nimCallDepthLimit {.intdefine.} = 2000
+
+proc callDepthLimitReached() {.noinline.} =
+  writeStackTrace()
+  let msg = "Error: call depth limit reached in a debug build (" &
+      $nimCallDepthLimit & " function calls). You can change it with " &
+      "-d:nimCallDepthLimit=<int> but really try to avoid deep " &
+      "recursions instead.\n"
+  showErrorMessage2(msg)
+  rawQuit(1)
+
+proc nimFrame(s: PFrame) {.compilerRtl, inl, raises: [].} =
+  if framePtr == nil:
+    s.calldepth = 0
+    when NimStackTraceMsgs: s.frameMsgLen = 0
+  else:
+    s.calldepth = framePtr.calldepth+1
+    when NimStackTraceMsgs: s.frameMsgLen = framePtr.frameMsgLen
+  s.prev = framePtr
+  framePtr = s
+  if s.calldepth == nimCallDepthLimit: callDepthLimitReached()
+
+when defined(cpp) and appType != "lib" and not gotoBasedExceptions and
+    not defined(js) and not defined(nimscript) and
+    hostOS != "standalone" and hostOS != "any" and not defined(noCppExceptions) and
+    not quirkyExceptions:
+
+  type
+    StdException {.importcpp: "std::exception", header: "<exception>".} = object
+
+  proc what(ex: StdException): cstring {.importcpp: "((char *)#.what())", nodecl.}
+
+  proc setTerminate(handler: proc() {.noconv.})
+    {.importc: "std::set_terminate", header: "<exception>".}
+
+  setTerminate proc() {.noconv.} =
+    # Remove ourself as a handler, reinstalling the default handler.
+    setTerminate(nil)
+
+    var msg = "Unknown error in unexpected exception handler"
+    try:
+      {.emit: "#if !defined(_MSC_VER) || (_MSC_VER >= 1923)".}
+      raise
+      {.emit: "#endif".}
+    except Exception:
+      msg = currException.getStackTrace() & "Error: unhandled exception: " &
+        currException.msg & " [" & $currException.name & "]"
+    except StdException as e:
+      msg = "Error: unhandled cpp exception: " & $e.what()
+    except:
+      msg = "Error: unhandled unknown cpp exception"
+
+    {.emit: "#if defined(_MSC_VER) && (_MSC_VER < 1923)".}
+    msg = "Error: unhandled unknown cpp exception"
+    {.emit: "#endif".}
+
+    when defined(genode):
+      # stderr not available by default, use the LOG session
+      echo msg
+    else:
+      writeToStdErr msg & "\n"
+
+    rawQuit 1
 
 when not defined(noSignalHandler) and not defined(useNimRtl):
+  type Sighandler = proc (a: cint) {.noconv, benign.}
+    # xxx factor with ansi_c.CSighandlerT, posix.Sighandler
+
   proc signalHandler(sign: cint) {.exportc: "signalHandler", noconv.} =
     template processSignal(s, action: untyped) {.dirty.} =
       if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
       elif s == SIGSEGV:
         action("SIGSEGV: Illegal storage access. (Attempt to read from nil?)\n")
       elif s == SIGABRT:
-        when defined(endb):
-          if dbgAborting: return # the debugger wants to abort
         action("SIGABRT: Abnormal termination.\n")
       elif s == SIGFPE: action("SIGFPE: Arithmetic error.\n")
       elif s == SIGILL: action("SIGILL: Illegal operation.\n")
-      elif s == SIGBUS:
+      elif (when declared(SIGBUS): s == SIGBUS else: false):
         action("SIGBUS: Illegal storage access. (Attempt to read from nil?)\n")
       else:
         block platformSpecificSignal:
@@ -480,34 +647,59 @@ when not defined(noSignalHandler) and not defined(useNimRtl):
     when defined(memtracker):
       logPendingOps()
     when hasSomeStackTrace:
-      GC_disable()
+      when not usesDestructors: GC_disable()
       var buf = newStringOfCap(2000)
       rawWriteStackTrace(buf)
       processSignal(sign, buf.add) # nice hu? currying a la Nim :-)
-      showErrorMessage(buf)
-      GC_enable()
+      showErrorMessage2(buf)
+      when not usesDestructors: GC_enable()
     else:
       var msg: cstring
       template asgn(y) =
         msg = y
       processSignal(sign, asgn)
-      showErrorMessage(msg)
-    when defined(endb): dbgAborting = true
-    quit(1) # always quit when SIGABRT
+      # xxx use string for msg instead of cstring, and here use showErrorMessage2(msg)
+      # unless there's a good reason to use cstring in signal handler to avoid
+      # using gc?
+      showErrorMessage(msg, msg.len)
+
+    when defined(posix):
+      # reset the signal handler to OS default
+      c_signal(sign, SIG_DFL)
+
+      # re-raise the signal, which will arrive once this handler exit.
+      # this lets the OS perform actions like core dumping and will
+      # also return the correct exit code to the shell.
+      discard c_raise(sign)
+    else:
+      rawQuit(1)
+
+  var SIG_IGN {.importc: "SIG_IGN", header: "<signal.h>".}: Sighandler
 
   proc registerSignalHandler() =
+    # xxx `signal` is deprecated and has many caveats, we should use `sigaction` instead, e.g.
+    # https://stackoverflow.com/questions/231912/what-is-the-difference-between-sigaction-and-signal
     c_signal(SIGINT, signalHandler)
     c_signal(SIGSEGV, signalHandler)
     c_signal(SIGABRT, signalHandler)
     c_signal(SIGFPE, signalHandler)
     c_signal(SIGILL, signalHandler)
-    c_signal(SIGBUS, signalHandler)
+    when declared(SIGBUS):
+      c_signal(SIGBUS, signalHandler)
     when declared(SIGPIPE):
-      c_signal(SIGPIPE, signalHandler)
+      when defined(nimLegacySigpipeHandler):
+        c_signal(SIGPIPE, signalHandler)
+      else:
+        c_signal(SIGPIPE, SIG_IGN)
 
   registerSignalHandler() # call it in initialization section
 
 proc setControlCHook(hook: proc () {.noconv.}) =
   # ugly cast, but should work on all architectures:
-  type SignalHandler = proc (sign: cint) {.noconv, benign.}
-  c_signal(SIGINT, cast[SignalHandler](hook))
+  when declared(Sighandler):
+    c_signal(SIGINT, cast[Sighandler](hook))
+
+when not defined(noSignalHandler) and not defined(useNimRtl):
+  proc unsetControlCHook() =
+    # proc to unset a hook set by setControlCHook
+    c_signal(SIGINT, signalHandler)
diff --git a/lib/system/fatal.nim b/lib/system/fatal.nim
new file mode 100644
index 000000000..25c05e52d
--- /dev/null
+++ b/lib/system/fatal.nim
@@ -0,0 +1,58 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+{.push profiler: off.}
+
+const
+  gotoBasedExceptions = compileOption("exceptions", "goto")
+  quirkyExceptions = compileOption("exceptions", "quirky")
+
+when hostOS == "standalone":
+  include "$projectpath/panicoverride"
+
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline.} =
+    panic(message)
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline.} =
+    rawoutput(message)
+    panic(arg)
+
+elif quirkyExceptions and not defined(nimscript):
+  import ansi_c
+
+  func name(t: typedesc): string {.magic: "TypeTrait".}
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
+    when nimvm:
+      # TODO when doAssertRaises works in CT, add a test for it
+      raise (ref exceptn)(msg: message & arg)
+    else:
+      {.noSideEffect.}:
+        writeStackTrace()
+        var buf = newStringOfCap(200)
+        add(buf, "Error: unhandled exception: ")
+        add(buf, message)
+        add(buf, arg)
+        add(buf, " [")
+        add(buf, name exceptn)
+        add(buf, "]\n")
+        cstderr.rawWrite buf
+      rawQuit 1
+
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
+    sysFatal(exceptn, message, "")
+
+else:
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
+    raise (ref exceptn)(msg: message)
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
+    raise (ref exceptn)(msg: message & arg)
+
+{.pop.}
diff --git a/lib/system/formatfloat.nim b/lib/system/formatfloat.nim
new file mode 100644
index 000000000..70dd857d5
--- /dev/null
+++ b/lib/system/formatfloat.nim
@@ -0,0 +1,6 @@
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export formatfloat
+  {.deprecated: "use `std/formatfloat`".}
+else:
+  {.error: "use `std/formatfloat`".}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 425963f3f..9289c7f55 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -12,14 +12,64 @@
 # Refcounting + Mark&Sweep. Complex algorithms avoided.
 # Been there, done that, didn't work.
 
+#[
+
+A *cell* is anything that is traced by the GC
+(sequences, refs, strings, closures).
+
+The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+References on the stack are not counted for better performance and easier C
+code generation.
+
+Each cell has a header consisting of a RC and a pointer to its type
+descriptor. However the program does not know about these, so they are placed at
+negative offsets. In the GC code the type `PCell` denotes a pointer
+decremented by the right offset, so that the header can be accessed easily. It
+is extremely important that `pointer` is not confused with a `PCell`.
+
+In Nim the compiler cannot always know if a reference
+is stored on the stack or not. This is caused by var parameters.
+Consider this example:
+
+  ```Nim
+  proc setRef(r: var ref TNode) =
+    new(r)
+
+  proc usage =
+    var
+      r: ref TNode
+    setRef(r) # here we should not update the reference counts, because
+              # r is on the stack
+    setRef(r.left) # here we should update the refcounts!
+  ```
+
+We have to decide at runtime whether the reference is on the stack or not.
+The generated code looks roughly like this:
+
+  ```C
+  void setref(TNode** ref) {
+    unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode)))
+  }
+  void usage(void) {
+    setRef(&r)
+    setRef(&r->left)
+  }
+  ```
+
+Note that for systems with a continuous stack (which most systems have)
+the check whether the ref is on the stack is very cheap (only two
+comparisons).
+]#
+
 {.push profiler:off.}
 
 const
   CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
+  InitialCycleThreshold = when defined(nimCycleBreaker): high(int)
+                          else: 4*1024*1024 # X MB because cycle checking is slow
+  InitialZctThreshold = 500  # we collect garbage if the ZCT's size
+                             # reaches this threshold
+                             # this seems to be a good value
   withRealTime = defined(useRealtimeGC)
 
 when withRealTime and not declared(getTicks):
@@ -28,7 +78,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int) {.benign.}
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -46,7 +96,7 @@ type
     waZctDecRef, waPush
     #, waDebug
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
@@ -78,6 +128,7 @@ type
     when nimCoroutines:
       activeStack: ptr GcStack    # current executing coroutine stack.
     cycleThreshold: int
+    zctThreshold: int
     when useCellIds:
       idGenerator: int
     zct: CellSeq             # the zero count table
@@ -100,23 +151,19 @@ var
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    acquireSys(HeapLock)
-
-template release(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
 template gcAssert(cond: bool, msg: string) =
   when defined(useGcAssert):
     if not cond:
-      echo "[GCASSERT] ", msg
+      cstderr.rawWrite "[GCASSERT] "
+      cstderr.rawWrite msg
+      when defined(logGC):
+        cstderr.rawWrite "[GCASSERT] statistics:\L"
+        cstderr.rawWrite GC_getStatistics()
       GC_disable()
       writeStackTrace()
       #var x: ptr int
       #echo x[]
-      quit 1
+      rawQuit 1
 
 proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
   if (c.refcount and ZctFlag) == 0:
@@ -125,18 +172,18 @@ proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = int(usrToCell(p).refcount) shr rcShift
+  result = usrToCell(p).refcount shr rcShift
 
 # this that has to equals zero, otherwise we have to round up UnitsPerPage:
 when BitsPerPage mod (sizeof(int)*8) != 0:
@@ -160,65 +207,52 @@ when defined(logGC):
           typName = c.typ.name
 
     when leakDetector:
-      c_fprintf(stdout, "[GC] %s: %p %d %s rc=%ld from %s(%ld)\n",
+      c_printf("[GC] %s: %p %d %s rc=%ld from %s(%ld)\n",
                 msg, c, kind, typName, c.refcount shr rcShift, c.filename, c.line)
     else:
-      c_fprintf(stdout, "[GC] %s: %p %d %s rc=%ld; thread=%ld\n",
+      c_printf("[GC] %s: %p %d %s rc=%ld; thread=%ld\n",
                 msg, c, kind, typName, c.refcount shr rcShift, gch.gcThreadId)
 
+template logCell(msg: cstring, c: PCell) =
+  when defined(logGC):
+    writeCell(msg, c)
+
 template gcTrace(cell, state: untyped) =
   when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
+proc collectCT(gch: var GcHeap) {.benign, raises: [].}
+proc isOnStack(p: pointer): bool {.noinline, benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
-when hasThreadSupport and hasSharedHeap:
-  template `--`(x: untyped): untyped = atomicDec(x, rcIncrement) <% rcIncrement
-  template `++`(x: untyped) = discard atomicInc(x, rcIncrement)
-else:
-  template `--`(x: untyped): untyped =
-    dec(x, rcIncrement)
-    x <% rcIncrement
-  template `++`(x: untyped) = inc(x, rcIncrement)
-
 proc incRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
   c.refcount = c.refcount +% rcIncrement
   # and not colorMask
-  #writeCell("incRef", c)
+  logCell("incRef", c)
 
-proc nimGCref(p: pointer) {.compilerProc.} =
+proc nimGCref(p: pointer) {.compilerproc.} =
   # we keep it from being collected by pretending it's not even allocated:
-  add(gch.additionalRoots, usrToCell(p))
-  incRef(usrToCell(p))
-
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    acquireSys(HeapLock)
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
+  let c = usrToCell(p)
+  add(gch.additionalRoots, c)
+  incRef(c)
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    acquireSys(HeapLock)
   addZCT(gch.zct, c)
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
 
 proc decRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
   gcAssert(c.refcount >=% rcIncrement, "decRef")
-  if --c.refcount:
+  c.refcount = c.refcount -% rcIncrement
+  if c.refcount <% rcIncrement:
     rtlAddZCT(c)
+  logCell("decRef", c)
 
-proc nimGCunref(p: pointer) {.compilerProc.} =
+proc nimGCunref(p: pointer) {.compilerproc.} =
   let cell = usrToCell(p)
   var L = gch.additionalRoots.len-1
   var i = L
@@ -239,25 +273,15 @@ template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
       if gch.decStack.d[i] == c:
         sysAssert(false, msg)
 
-proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
-  ## adds 'p' to the cycle candidate set for the cycle collector. It is
-  ## necessary if you used the 'acyclic' pragma for optimization
-  ## purposes and need to break cycles manually.
-  rtlAddCycleRoot(usrToCell(cast[pointer](p)))
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
-  var c = usrToCell(p)
-  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if --c.refcount:
-    rtlAddZCT(c)
-    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
+  decRef(usrToCell(p))
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-proc nimGCunrefRC1(p: pointer) {.compilerProc, inline.} =
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} =
   decRef(usrToCell(p))
 
-proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   # the code generator calls this proc!
   gcAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
@@ -265,21 +289,12 @@ proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   if dest[] != nil: decRef(usrToCell(dest[]))
   dest[] = src
 
-proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no
-  # cycle is possible.
-  if src != nil:
-    var c = usrToCell(src)
-    ++c.refcount
-  if dest[] != nil:
-    var c = usrToCell(dest[])
-    if --c.refcount:
-      rtlAddZCT(c)
-  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
 
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide wether a
+  # stack. It is used by the code generator if it cannot decide whether a
   # reference is in the stack or not (this can happen for var parameters).
   if not isOnStack(dest):
     if src != nil: incRef(usrToCell(src))
@@ -298,6 +313,7 @@ proc initGC() =
     when traceGC:
       for i in low(CellState)..high(CellState): init(states[i])
     gch.cycleThreshold = InitialCycleThreshold
+    gch.zctThreshold = InitialZctThreshold
     gch.stat.stackScans = 0
     gch.stat.cycleCollections = 0
     gch.stat.maxThreshold = 0
@@ -322,14 +338,14 @@ proc cellsetReset(s: var CellSet) =
 {.push stacktrace:off.}
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
     for i in 0..n.len-1:
       # inlined for speed
       if n.sons[i].kind == nkSlot:
-        if n.sons[i].typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}:
+        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
           doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
           forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
@@ -342,11 +358,11 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
-    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
+    of tyRef, tyString, tySequence: # leaf:
       doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
@@ -359,21 +375,20 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: cell is nil")
   gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: pointer not part of the heap")
   gcAssert(cell.typ != nil, "forAllChildren: cell.typ is nil")
-  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: unknown GC'ed type"
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: unknown GC'ed type"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
     case cell.typ.kind
-    of tyRef, tyOptAsRef: # common case
+    of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
+      var d = cast[int](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
+          forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
     else: discard
 
 proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
@@ -440,12 +455,11 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   incTypeSize typ, size
   sysAssert(allocInv(gch.region), "rawNewObj begin")
-  acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
@@ -454,10 +468,9 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
   # its refcount is zero, so add it to the ZCT:
   addNewObjToZCT(res, gch)
-  when logGC: writeCell("new cell", res)
+  logCell("new cell", res)
   track("rawNewObj", res, size)
   gcTrace(res, csAllocated)
-  release(gch)
   when useCellIds:
     inc gch.idGenerator
     res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
@@ -471,40 +484,40 @@ proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   when defined(memProfiler): nimProfile(size)
 
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
+{.push overflowChecks: on.}
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   # `newObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
   result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
+{.pop.}
 
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   # generates a new object and sets its reference counter to 1
   incTypeSize typ, size
   sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
 
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
   res.refcount = rcIncrement # refcount is 1
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
+  logCell("new cell", res)
   track("newObjRC1", res, size)
   gcTrace(res, csAllocated)
-  release(gch)
   when useCellIds:
     inc gch.idGenerator
     res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
@@ -513,15 +526,16 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   sysAssert(allocInv(gch.region), "newObjRC1 end")
   when defined(memProfiler): nimProfile(size)
 
+{.push overflowChecks: on.}
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
   result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
+{.pop.}
 
 proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
@@ -529,52 +543,28 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  var elemSize,elemAlign = 1
+  if ol.typ.kind != tyString:
+    elemSize = ol.typ.base.size
+    elemAlign = ol.typ.base.align
   incTypeSize ol.typ, newsize
 
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize
   copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res) +% oldsize +% sizeof(Cell)),
+  zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
   # This can be wrong for intermediate temps that are nevertheless on the
   # heap because of lambda lifting:
   #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
+  logCell("growObj old cell", ol)
+  logCell("growObj new cell", res)
   gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
   track("growObj old", ol, 0)
   track("growObj new", res, newsize)
-  when reallyDealloc:
-    sysAssert(allocInv(gch.region), "growObj before dealloc")
-    if ol.refcount shr rcShift <=% 1:
-      # free immediately to save space:
-      if (ol.refcount and ZctFlag) != 0:
-        var j = gch.zct.len-1
-        var d = gch.zct.d
-        while j >= 0:
-          if d[j] == ol:
-            d[j] = res
-            break
-          dec(j)
-      beforeDealloc(gch, ol, "growObj stack trash")
-      decTypeSize(ol, ol.typ)
-      rawDealloc(gch.region, ol)
-    else:
-      # we split the old refcount in 2 parts. XXX This is still not entirely
-      # correct if the pointer that receives growObj's result is on the stack.
-      # A better fix would be to emit the location specific write barrier for
-      # 'growObj', but this is lots of more work and who knows what new problems
-      # this would create.
-      res.refcount = rcIncrement
-      decRef(ol)
-  else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(Cell))
-  release(gch)
+  # since we steal the old seq's contents, we set the old length to 0.
+  cast[PGenericSeq](old).len = 0
   when useCellIds:
     inc gch.idGenerator
     res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
@@ -593,7 +583,7 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   prepareDealloc(c)
   gcTrace(c, csCycFreed)
   track("cycle collector dealloc cell", c, 0)
-  when logGC: writeCell("cycle collector dealloc cell", c)
+  logCell("cycle collector dealloc cell", c)
   when reallyDealloc:
     sysAssert(allocInv(gch.region), "free cyclic cell")
     beforeDealloc(gch, c, "freeCyclicCell: stack trash")
@@ -621,7 +611,7 @@ proc markS(gch: var GcHeap, c: PCell) =
     if not containsOrIncl(gch.marked, d):
       forAllChildren(d, waMarkPrecise)
 
-proc markGlobals(gch: var GcHeap) =
+proc markGlobals(gch: var GcHeap) {.raises: [].} =
   if gch.gcThreadId == 0:
     for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
   for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
@@ -638,7 +628,7 @@ when logGC:
       if cycleCheckA[i] == c: return true
     if cycleCheckALen == len(cycleCheckA):
       gcAssert(false, "cycle detection overflow")
-      quit 1
+      rawQuit 1
     cycleCheckA[cycleCheckALen] = c
     inc cycleCheckALen
 
@@ -648,7 +638,7 @@ when logGC:
     else:
       writeCell("cell {", s)
       forAllChildren(s, waDebug)
-      c_fprintf(stdout, "}\n")
+      c_printf("}\n")
 
 proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
@@ -659,14 +649,12 @@ proc doOperation(p: pointer, op: WalkOp) =
   case op
   of waZctDecRef:
     #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
+    #  c_printf("[GC] decref bug: %p", c)
     gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
     gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
-    #c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
+    logCell("decref (from doOperation)", c)
     track("waZctDecref", p, 0)
     decRef(c)
-    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
   of waPush:
     add(gch.tempStack, c)
   of waMarkGlobal:
@@ -678,9 +666,9 @@ proc doOperation(p: pointer, op: WalkOp) =
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, WalkOp(op))
 
-proc collectZCT(gch: var GcHeap): bool {.benign.}
+proc collectZCT(gch: var GcHeap): bool {.benign, raises: [].}
 
-proc collectCycles(gch: var GcHeap) =
+proc collectCycles(gch: var GcHeap) {.raises: [].} =
   when hasThreadSupport:
     for c in gch.toDispose:
       nimGCunref(c)
@@ -697,20 +685,20 @@ proc collectCycles(gch: var GcHeap) =
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       # mark the cell:
-      objStart.refcount = objStart.refcount +% rcIncrement
+      incRef(objStart)
       add(gch.decStack, objStart)
     when false:
+      let cell = usrToCell(p)
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
         # mark the cell:
-        cell.refcount = cell.refcount +% rcIncrement
+        incRef(cell)
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
@@ -720,7 +708,8 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   garbage collection that is used by Nim. For more information, please see the documentation of
   `CLANG_NO_SANITIZE_ADDRESS` in `lib/nimbase.h`.
  ]#
-proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl, codegenDecl: "CLANG_NO_SANITIZE_ADDRESS $# $#$#".} =
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl,
+    codegenDecl: "CLANG_NO_SANITIZE_ADDRESS N_LIB_PRIVATE $# $#$#".} =
   forEachStackSlot(gch, gcMark)
 
 proc collectZCT(gch: var GcHeap): bool =
@@ -752,7 +741,7 @@ proc collectZCT(gch: var GcHeap): bool =
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      when logGC: writeCell("zct dealloc cell", c)
+      logCell("zct dealloc cell", c)
       track("zct dealloc cell", c, 0)
       gcTrace(c, csZctFreed)
       # We are about to free the object, call the finalizer BEFORE its
@@ -784,14 +773,9 @@ proc unmarkStackAndRegisters(gch: var GcHeap) =
   for i in 0..gch.decStack.len-1:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
     decRef(d[i])
-    #var c = d[i]
-    # XXX no need for an atomic dec here:
-    #if --c.refcount:
-    #  addZCT(gch.zct, c)
-    #sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
   gch.decStack.len = 0
 
-proc collectCTBody(gch: var GcHeap) =
+proc collectCTBody(gch: var GcHeap) {.raises: [].} =
   when withRealTime:
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
@@ -823,14 +807,10 @@ proc collectCTBody(gch: var GcHeap) =
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
+        c_printf("[GC] missed deadline: %ld\n", duration)
 
 proc collectCT(gch: var GcHeap) =
-  # stackMarkCosts prevents some pathological behaviour: Stack marking
-  # becomes more expensive with large stacks and large stacks mean that
-  # cells with RC=0 are more likely to be kept alive by the stack.
-  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
-  if (gch.zct.len >= stackMarkCosts or (cycleGC and
+  if (gch.zct.len >= gch.zctThreshold or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     when false:
@@ -838,6 +818,13 @@ proc collectCT(gch: var GcHeap) =
       cellsetReset(gch.marked)
       markForDebug(gch)
     collectCTBody(gch)
+    gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
+
+proc GC_collectZct*() =
+  ## Collect the ZCT (zero count table). Unstable, experimental API for
+  ## testing purposes.
+  ## DO NOT USE!
+  collectCTBody(gch)
 
 when withRealTime:
   proc toNano(x: int): Nanos {.inline.} =
@@ -847,13 +834,12 @@ when withRealTime:
     gch.maxPause = MaxPauseInUs.toNano
 
   proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    acquire(gch)
     gch.maxPause = us.toNano
-    if (gch.zct.len >= ZctThreshold or (cycleGC and
+    if (gch.zct.len >= gch.zctThreshold or (cycleGC and
         getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
-    release(gch)
+      gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
 
   proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
     if stackSize >= 0:
@@ -864,10 +850,10 @@ when withRealTime:
         stack.bottomSaved = stack.bottom
         when stackIncreases:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
+            cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize)
         else:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
+            cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize)
 
     GC_step(gch, us, strongAdvice)
 
@@ -877,18 +863,13 @@ when withRealTime:
 
 when not defined(useNimRtl):
   proc GC_disable() =
-    when hasThreadSupport and hasSharedHeap:
-      discard atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
+    inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock <= 0:
-      raise newException(AssertionError,
-          "API usage error: GC_enable called but GC is already enabled")
-    when hasThreadSupport and hasSharedHeap:
-      discard atomicDec(gch.recGcLock, 1)
-    else:
-      dec(gch.recGcLock)
+    when defined(nimDoesntTrackDefects):
+      if gch.recGcLock <= 0:
+        raise newException(AssertionDefect,
+            "API usage error: GC_enable called but GC is already enabled")
+    dec(gch.recGcLock)
 
   proc GC_setStrategy(strategy: GC_Strategy) =
     discard
@@ -897,16 +878,14 @@ when not defined(useNimRtl):
     gch.cycleThreshold = InitialCycleThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
-    acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
     collectCT(gch)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
     result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
@@ -923,6 +902,10 @@ when not defined(useNimRtl):
       for stack in items(gch.stack):
         result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
     else:
+      # this caused memory leaks, see #10488 ; find a way without `repr`
+      # maybe using a local copy of strutils.toHex or snprintf
+      when defined(logGC):
+        result.add "[GC] stack bottom: " & gch.stack.bottom.repr
       result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
 
 {.pop.} # profiler: off, stackTrace: off
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
deleted file mode 100644
index 283919503..000000000
--- a/lib/system/gc2.nim
+++ /dev/null
@@ -1,777 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2017 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-#            Garbage Collector
-#
-# The basic algorithm is an incremental mark
-# and sweep GC to free cycles. It is hard realtime in that if you play
-# according to its rules, no deadline will ever be missed.
-# Since this kind of collector is very bad at recycling dead objects
-# early, Nim's codegen emits ``nimEscape`` calls at strategic
-# places. For this to work even 'unsureAsgnRef' needs to mark things
-# so that only return values need to be considered in ``nimEscape``.
-
-{.push profiler:off.}
-
-const
-  CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 512*1024 # start collecting after 500KB
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
-  withRealTime = defined(useRealtimeGC)
-
-when withRealTime and not declared(getTicks):
-  include "system/timers"
-when defined(memProfiler):
-  proc nimProfile(requestedSize: int) {.benign.}
-
-when hasThreadSupport:
-  include sharedlist
-
-type
-  ObjectSpaceIter = object
-    state: range[-1..0]
-
-iterToProc(allObjects, ptr ObjectSpaceIter, allObjectsAsProc)
-
-const
-  escapedBit = 0b1000 # so that lowest 3 bits are not touched
-  rcBlackOrig = 0b000
-  rcWhiteOrig = 0b001
-  rcGrey = 0b010   # traditional color for incremental mark&sweep
-  rcUnused = 0b011
-  colorMask = 0b011
-type
-  WalkOp = enum
-    waMarkGlobal,    # part of the backup mark&sweep
-    waMarkGrey,
-    waZctDecRef,
-    waDebug
-
-  Phase {.pure.} = enum
-    None, Marking, Sweeping
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
-    # A ref type can have a finalizer that is called before the object's
-    # storage is freed.
-
-  GcStat = object
-    stackScans: int          # number of performed stack scans (for statistics)
-    completedCollections: int    # number of performed full collections
-    maxThreshold: int        # max threshold that has been set
-    maxStackSize: int        # max stack size
-    maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table
-    maxPause: int64          # max measured GC pause in nanoseconds
-
-  GcStack {.final, pure.} = object
-    when nimCoroutines:
-      prev: ptr GcStack
-      next: ptr GcStack
-      maxStackSize: int      # Used to track statistics because we can not use
-                             # GcStat.maxStackSize when multiple stacks exist.
-    bottom: pointer
-
-    when withRealTime or nimCoroutines:
-      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
-    when withRealTime:
-      bottomSaved: pointer
-
-  GcHeap = object # this contains the zero count and
-                  # non-zero count table
-    black, red: int # either 0 or 1.
-    stack: GcStack
-    when nimCoroutines:
-      activeStack: ptr GcStack    # current executing coroutine stack.
-    phase: Phase
-    cycleThreshold: int
-    when useCellIds:
-      idGenerator: int
-    greyStack: CellSeq
-    recGcLock: int           # prevent recursion via finalizers; no thread lock
-    when withRealTime:
-      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
-    region: MemRegion        # garbage collected region
-    stat: GcStat
-    additionalRoots: CellSeq # explicit roots for GC_ref/unref
-    spaceIter: ObjectSpaceIter
-    pDumpHeapFile: pointer # File that is used for GC_dumpHeap
-    when hasThreadSupport:
-      toDispose: SharedList[pointer]
-    gcThreadId: int
-
-var
-  gch {.rtlThreadVar.}: GcHeap
-
-when not defined(useNimRtl):
-  instantiateForRegion(gch.region)
-
-template acquire(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    acquireSys(HeapLock)
-
-template release(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
-# Which color to use for new objects is tricky: When we're marking,
-# they have to be *white* so that everything is marked that is only
-# reachable from them. However, when we are sweeping, they have to
-# be black, so that we don't free them prematuredly. In order to save
-# a comparison gch.phase == Phase.Marking, we use the pseudo-color
-# 'red' for new objects.
-template allocColor(): untyped = gch.red
-
-template gcAssert(cond: bool, msg: string) =
-  when defined(useGcAssert):
-    if not cond:
-      echo "[GCASSERT] ", msg
-      GC_disable()
-      writeStackTrace()
-      quit 1
-
-proc cellToUsr(cell: PCell): pointer {.inline.} =
-  # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
-
-proc usrToCell(usr: pointer): PCell {.inline.} =
-  # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
-
-proc canBeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
-
-proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
-  # used for code generation concerning debugging
-  result = usrToCell(c).typ
-
-proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = 0
-
-# this that has to equals zero, otherwise we have to round up UnitsPerPage:
-when BitsPerPage mod (sizeof(int)*8) != 0:
-  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
-
-template color(c): untyped = c.refCount and colorMask
-template setColor(c, col) =
-  c.refcount = c.refcount and not colorMask or col
-
-template markAsEscaped(c: PCell) =
-  c.refcount = c.refcount or escapedBit
-
-template didEscape(c: PCell): bool =
-  (c.refCount and escapedBit) != 0
-
-proc writeCell(file: File; msg: cstring, c: PCell) =
-  var kind = -1
-  if c.typ != nil: kind = ord(c.typ.kind)
-  let col = if c.color == rcGrey: 'g'
-            elif c.color == gch.black: 'b'
-            else: 'w'
-  when useCellIds:
-    let id = c.id
-  else:
-    let id = c
-  when defined(nimTypeNames):
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c of type %s\n",
-              msg, id, kind, didEscape(c), col, c.typ.name)
-  elif leakDetector:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c from %s(%ld)\n",
-              msg, id, kind, didEscape(c), col, c.filename, c.line)
-  else:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c\n",
-              msg, id, kind, didEscape(c), col)
-
-proc writeCell(msg: cstring, c: PCell) =
-  stdout.writeCell(msg, c)
-
-proc myastToStr[T](x: T): string {.magic: "AstToStr", noSideEffect.}
-
-template gcTrace(cell, state: untyped) =
-  when traceGC: writeCell(myastToStr(state), cell)
-
-# forward declarations:
-proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
-# we need the prototype here for debugging purposes
-
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  discard
-
-proc nimGCref(p: pointer) {.compilerProc.} =
-  let cell = usrToCell(p)
-  markAsEscaped(cell)
-  add(gch.additionalRoots, cell)
-
-proc nimGCunref(p: pointer) {.compilerProc.} =
-  let cell = usrToCell(p)
-  var L = gch.additionalRoots.len-1
-  var i = L
-  let d = gch.additionalRoots.d
-  while i >= 0:
-    if d[i] == cell:
-      d[i] = d[L]
-      dec gch.additionalRoots.len
-      break
-    dec(i)
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
-  discard "can we do some freeing here?"
-
-proc nimGCunrefRC1(p: pointer) {.compilerProc, inline.} =
-  discard "can we do some freeing here?"
-
-template markGrey(x: PCell) =
-  if x.color != 1-gch.black and gch.phase == Phase.Marking:
-    if not isAllocatedPtr(gch.region, x):
-      c_fprintf(stdout, "[GC] markGrey proc: %p\n", x)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-    x.setColor(rcGrey)
-    add(gch.greyStack, x)
-
-proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
-  ## adds 'p' to the cycle candidate set for the cycle collector. It is
-  ## necessary if you used the 'acyclic' pragma for optimization
-  ## purposes and need to break cycles manually.
-  discard
-
-template asgnRefImpl =
-  gcAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    markGrey(s)
-  dest[] = src
-
-proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc!
-  asgnRefImpl()
-
-proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  asgnRefImpl()
-
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
-  # unsureAsgnRef marks 'src' as grey only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide wether a
-  # reference is in the stack or not (this can happen for var parameters).
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    if not isOnStack(dest): markGrey(s)
-  dest[] = src
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
-  case n.kind
-  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
-  of nkList:
-    for i in 0..n.len-1:
-      forAllSlotsAux(dest, n.sons[i], op)
-  of nkCase:
-    var m = selectBranch(dest, n)
-    if m != nil: forAllSlotsAux(dest, m, op)
-  of nkNone: sysAssert(false, "forAllSlotsAux")
-
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
-  if dest == nil: return # nothing to do
-  if ntfNoRefs notin mt.flags:
-    case mt.kind
-    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
-      doOperation(cast[PPointer](d)[], op)
-    of tyObject, tyTuple:
-      forAllSlotsAux(dest, mt.node, op)
-    of tyArray, tyArrayConstr, tyOpenArray:
-      for i in 0..(mt.size div mt.base.size)-1:
-        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: discard
-
-proc forAllChildren(cell: PCell, op: WalkOp) =
-  gcAssert(cell != nil, "forAllChildren: 1")
-  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
-  gcAssert(cell.typ != nil, "forAllChildren: 3")
-  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: 4"
-  let marker = cell.typ.marker
-  if marker != nil:
-    marker(cellToUsr(cell), op.int)
-  else:
-    case cell.typ.kind
-    of tyRef, tyOptAsRef: # common case
-      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
-    of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
-    else: discard
-
-{.push stackTrace: off, profiler:off.}
-proc gcInvariant*() =
-  sysAssert(allocInv(gch.region), "injected")
-  when declared(markForDebug):
-    markForDebug(gch)
-{.pop.}
-
-include gc_common
-
-proc initGC() =
-  when not defined(useNimRtl):
-    gch.red = (1-gch.black)
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.completedCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.additionalRoots)
-    init(gch.greyStack)
-    when hasThreadSupport:
-      init(gch.toDispose)
-    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
-    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
-
-proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
-  # generates a new object and sets its reference counter to 0
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
-  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  # refcount is zero, color is black, but mark it to be in the ZCT
-  res.refcount = allocColor()
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  sysAssert(allocInv(gch.region), "rawNewObj end")
-
-{.pop.}
-
-proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  when defined(memProfiler): nimProfile(size)
-
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
-
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = newObj(typ, size)
-
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  result = newSeq(typ, len)
-
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  acquire(gch)
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  incTypeSize ol.typ, newsize
-
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when false:
-    # this is wrong since seqs can be shared via 'shallow':
-    when reallyDealloc: rawDealloc(gch.region, ol)
-    else:
-      zeroMem(ol, sizeof(Cell))
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  release(gch)
-  result = cellToUsr(res)
-  when defined(memProfiler): nimProfile(newsize-oldsize)
-
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, gch)
-
-{.push profiler:off.}
-
-
-template takeStartTime(workPackageSize) {.dirty.} =
-  const workPackage = workPackageSize
-  var debugticker = 1000
-  when withRealTime:
-    var steps = workPackage
-    var t0: Ticks
-    if gch.maxPause > 0: t0 = getticks()
-
-template takeTime {.dirty.} =
-  when withRealTime: dec steps
-  dec debugticker
-
-template checkTime {.dirty.} =
-  if debugticker <= 0:
-    #echo "in loop"
-    debugticker = 1000
-  when withRealTime:
-    if steps == 0:
-      steps = workPackage
-      if gch.maxPause > 0:
-        let duration = getticks() - t0
-        # the GC's measuring is not accurate and needs some cleanup actions
-        # (stack unmarking), so subtract some short amount of time in
-        # order to miss deadlines less often:
-        if duration >= gch.maxPause - 50_000:
-          return false
-
-# ---------------- dump heap ----------------
-
-template dumpHeapFile(gch: var GcHeap): File =
-  cast[File](gch.pDumpHeapFile)
-
-proc debugGraph(s: PCell) =
-  c_fprintf(gch.dumpHeapFile, "child %p\n", s)
-
-proc dumpRoot(gch: var GcHeap; s: PCell) =
-  if isAllocatedPtr(gch.region, s):
-    c_fprintf(gch.dumpHeapFile, "global_root %p\n", s)
-  else:
-    c_fprintf(gch.dumpHeapFile, "global_root_invalid %p\n", s)
-
-proc GC_dumpHeap*(file: File) =
-  ## Dumps the GCed heap's content to a file. Can be useful for
-  ## debugging. Produces an undocumented text file format that
-  ## can be translated into "dot" syntax via the "heapdump2dot" tool.
-  gch.pDumpHeapFile = file
-  var spaceIter: ObjectSpaceIter
-  when false:
-    var d = gch.decStack.d
-    for i in 0 .. gch.decStack.len-1:
-      if isAllocatedPtr(gch.region, d[i]):
-        c_fprintf(file, "onstack %p\n", d[i])
-      else:
-        c_fprintf(file, "onstack_invalid %p\n", d[i])
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-  while true:
-    let x = allObjectsAsProc(gch.region, addr spaceIter)
-    if spaceIter.state < 0: break
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      writeCell(file, "cell ", c)
-      forAllChildren(c, waDebug)
-      c_fprintf(file, "end\n")
-  gch.pDumpHeapFile = nil
-
-proc GC_dumpHeap() =
-  var f: File
-  if open(f, "heap.txt", fmWrite):
-    GC_dumpHeap(f)
-    f.close()
-  else:
-    c_fprintf(stdout, "cannot write heap.txt")
-
-# ---------------- cycle collector -------------------------------------------
-
-proc freeCyclicCell(gch: var GcHeap, c: PCell) =
-  gcAssert(isAllocatedPtr(gch.region, c), "freeCyclicCell: freed pointer?")
-  prepareDealloc(c)
-  gcTrace(c, csCycFreed)
-  when logGC: writeCell("cycle collector dealloc cell", c)
-  when reallyDealloc:
-    sysAssert(allocInv(gch.region), "free cyclic cell")
-    rawDealloc(gch.region, c)
-  else:
-    gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(Cell))
-
-proc sweep(gch: var GcHeap): bool =
-  takeStartTime(100)
-  #echo "loop start"
-  let white = 1-gch.black
-  #c_fprintf(stdout, "black is %d\n", black)
-  while true:
-    let x = allObjectsAsProc(gch.region, addr gch.spaceIter)
-    if gch.spaceIter.state < 0: break
-    takeTime()
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      gcAssert c.color != rcGrey, "cell is still grey?"
-      if c.color == white: freeCyclicCell(gch, c)
-      # Since this is incremental, we MUST not set the object to 'white' here.
-      # We could set all the remaining objects to white after the 'sweep'
-      # completed but instead we flip the meaning of black/white to save one
-      # traversal over the heap!
-    checkTime()
-  # prepare for next iteration:
-  #echo "loop end"
-  gch.spaceIter = ObjectSpaceIter()
-  result = true
-
-proc markRoot(gch: var GcHeap, c: PCell) {.inline.} =
-  if c.color == 1-gch.black:
-    c.setColor(rcGrey)
-    add(gch.greyStack, c)
-
-proc markIncremental(gch: var GcHeap): bool =
-  var L = addr(gch.greyStack.len)
-  takeStartTime(100)
-  while L[] > 0:
-    var c = gch.greyStack.d[0]
-    if not isAllocatedPtr(gch.region, c):
-      c_fprintf(stdout, "[GC] not allocated anymore: %p\n", c)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-
-    #sysAssert(isAllocatedPtr(gch.region, c), "markIncremental: isAllocatedPtr")
-    gch.greyStack.d[0] = gch.greyStack.d[L[] - 1]
-    dec(L[])
-    takeTime()
-    if c.color == rcGrey:
-      c.setColor(gch.black)
-      forAllChildren(c, waMarkGrey)
-    elif c.color == (1-gch.black):
-      gcAssert false, "wtf why are there white objects in the greystack?"
-    checkTime()
-  gcAssert gch.greyStack.len == 0, "markIncremental: greystack not empty "
-  result = true
-
-proc markGlobals(gch: var GcHeap) =
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-
-proc doOperation(p: pointer, op: WalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  gcAssert(c != nil, "doOperation: 1")
-  # the 'case' should be faster than function pointers because of easy
-  # prediction:
-  case op
-  of waZctDecRef:
-    #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
-    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    discard "use me for nimEscape?"
-  of waMarkGlobal:
-    template handleRoot =
-      if gch.dumpHeapFile.isNil:
-        markRoot(gch, c)
-      else:
-        dumpRoot(gch, c)
-    handleRoot()
-    discard allocInv(gch.region)
-  of waMarkGrey:
-    when false:
-      if not isAllocatedPtr(gch.region, c):
-        c_fprintf(stdout, "[GC] not allocated anymore: MarkGrey %p\n", c)
-        #GC_dumpHeap()
-        sysAssert(false, "wtf")
-    if c.color == 1-gch.black:
-      c.setColor(rcGrey)
-      add(gch.greyStack, c)
-  of waDebug: debugGraph(c)
-
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, WalkOp(op))
-
-proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
-  # the addresses are not as cells on the stack, so turn them to cells:
-  sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
-  if c >% PageSize:
-    # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-    if objStart != nil:
-      # mark the cell:
-      markRoot(gch, objStart)
-  sysAssert(allocInv(gch.region), "gcMark end")
-
-proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-  forEachStackSlot(gch, gcMark)
-
-proc collectALittle(gch: var GcHeap): bool =
-  case gch.phase
-  of Phase.None:
-    if getOccupiedMem(gch.region) >= gch.cycleThreshold:
-      gch.phase = Phase.Marking
-      markGlobals(gch)
-      result = collectALittle(gch)
-      #when false: c_fprintf(stdout, "collectALittle: introduced bug E %ld\n", gch.phase)
-      #discard allocInv(gch.region)
-  of Phase.Marking:
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    prepareForInteriorPointerChecking(gch.region)
-    markStackAndRegisters(gch)
-    inc(gch.stat.stackScans)
-    if markIncremental(gch):
-      gch.phase = Phase.Sweeping
-      gch.red = 1 - gch.red
-  of Phase.Sweeping:
-    gcAssert gch.greyStack.len == 0, "greystack not empty"
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    if sweep(gch):
-      gch.phase = Phase.None
-      # flip black/white meanings:
-      gch.black = 1 - gch.black
-      gcAssert gch.red == 1 - gch.black, "red color is wrong"
-      inc(gch.stat.completedCollections)
-      result = true
-
-proc collectCTBody(gch: var GcHeap) =
-  when withRealTime:
-    let t0 = getticks()
-  sysAssert(allocInv(gch.region), "collectCT: begin")
-
-  when not nimCoroutines:
-    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-  #gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-  if collectALittle(gch):
-    gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                              CycleIncrease)
-    gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
-  sysAssert(allocInv(gch.region), "collectCT: end")
-  when withRealTime:
-    let duration = getticks() - t0
-    gch.stat.maxPause = max(gch.stat.maxPause, duration)
-    when defined(reportMissedDeadlines):
-      if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
-
-when nimCoroutines:
-  proc currentStackSizes(): int =
-    for stack in items(gch.stack):
-      result = result + stack.stackSize()
-
-proc collectCT(gch: var GcHeap) =
-  # stackMarkCosts prevents some pathological behaviour: Stack marking
-  # becomes more expensive with large stacks and large stacks mean that
-  # cells with RC=0 are more likely to be kept alive by the stack.
-  when nimCoroutines:
-    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
-  else:
-    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
-  if (gch.greyStack.len >= stackMarkCosts or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
-      gch.recGcLock == 0:
-    collectCTBody(gch)
-
-when withRealTime:
-  proc toNano(x: int): Nanos {.inline.} =
-    result = x * 1000
-
-  proc GC_setMaxPause*(MaxPauseInUs: int) =
-    gch.maxPause = MaxPauseInUs.toNano
-
-  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    gch.maxPause = us.toNano
-    #if (getOccupiedMem(gch.region)>=gch.cycleThreshold) or
-    #    alwaysGC or strongAdvice:
-    collectCTBody(gch)
-
-  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
-    if stackSize >= 0:
-      var stackTop {.volatile.}: pointer
-      gch.getActiveStack().pos = addr(stackTop)
-
-      for stack in gch.stack.items():
-        stack.bottomSaved = stack.bottom
-        when stackIncreases:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
-        else:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
-
-    GC_step(gch, us, strongAdvice)
-
-    if stackSize >= 0:
-      for stack in gch.stack.items():
-        stack.bottom = stack.bottomSaved
-
-when not defined(useNimRtl):
-  proc GC_disable() =
-    when hasThreadSupport and hasSharedHeap:
-      discard atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
-  proc GC_enable() =
-    if gch.recGcLock > 0:
-      when hasThreadSupport and hasSharedHeap:
-        discard atomicDec(gch.recGcLock, 1)
-      else:
-        dec(gch.recGcLock)
-
-  proc GC_setStrategy(strategy: GC_Strategy) =
-    discard
-
-  proc GC_enableMarkAndSweep() = discard
-  proc GC_disableMarkAndSweep() = discard
-
-  proc GC_fullCollect() =
-    var oldThreshold = gch.cycleThreshold
-    gch.cycleThreshold = 0 # forces cycle collection
-    collectCT(gch)
-    gch.cycleThreshold = oldThreshold
-
-  proc GC_getStatistics(): string =
-    GC_disable()
-    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
-             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
-             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
-             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-             "[GC] completed collections: " & $gch.stat.completedCollections & "\n" &
-             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] grey stack capacity: " & $gch.greyStack.cap & "\n" &
-             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
-    when nimCoroutines:
-      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
-      for stack in items(gch.stack):
-        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
-    else:
-      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
-    GC_enable()
-
-{.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index 939776a58..eb0884560 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -37,50 +37,61 @@ when defined(nimTypeNames):
         a[j] = v
       if h == 1: break
 
-  proc dumpNumberOfInstances* =
-    # also add the allocated strings to the list of known types:
+  iterator dumpHeapInstances*(): tuple[name: cstring; count: int; sizes: int] =
+    ## Iterate over summaries of types on heaps.
+    ## This data may be inaccurate if allocations
+    ## are made by the iterator body.
     if strDesc.nextType == nil:
       strDesc.nextType = nimTypeRoot
       strDesc.name = "string"
       nimTypeRoot = addr strDesc
+    var it = nimTypeRoot
+    while it != nil:
+      if (it.instances > 0 or it.sizes != 0):
+        yield (it.name, it.instances, it.sizes)
+      it = it.nextType
+
+  proc dumpNumberOfInstances* =
     var a: InstancesInfo
     var n = 0
-    var it = nimTypeRoot
     var totalAllocated = 0
-    while it != nil:
-      if (it.instances > 0 or it.sizes != 0) and n < a.len:
-        a[n] = (it.name, it.instances, it.sizes)
-        inc n
+    for it in dumpHeapInstances():
+      a[n] = it
+      inc n
       inc totalAllocated, it.sizes
-      it = it.nextType
     sortInstances(a, n)
     for i in 0 .. n-1:
-      c_fprintf(stdout, "[Heap] %s: #%ld; bytes: %ld\n", a[i][0], a[i][1], a[i][2])
-    c_fprintf(stdout, "[Heap] total number of bytes: %ld\n", totalAllocated)
+      c_fprintf(cstdout, "[Heap] %s: #%ld; bytes: %ld\n", a[i][0], a[i][1], a[i][2])
+    c_fprintf(cstdout, "[Heap] total number of bytes: %ld\n", totalAllocated)
+    when defined(nimTypeNames):
+      let (allocs, deallocs) = getMemCounters()
+      c_fprintf(cstdout, "[Heap] allocs/deallocs: %ld/%ld\n", allocs, deallocs)
 
   when defined(nimGcRefLeak):
     proc oomhandler() =
-      c_fprintf(stdout, "[Heap] ROOTS: #%ld\n", gch.additionalRoots.len)
+      c_fprintf(cstdout, "[Heap] ROOTS: #%ld\n", gch.additionalRoots.len)
       writeLeaks()
 
     outOfMemHook = oomhandler
 
 template decTypeSize(cell, t) =
-  # XXX this needs to use atomics for multithreaded apps!
   when defined(nimTypeNames):
     if t.kind in {tyString, tySequence}:
       let cap = cast[PGenericSeq](cellToUsr(cell)).space
-      let size = if t.kind == tyString: cap+1+GenericSeqSize
-                 else: addInt(mulInt(cap, t.base.size), GenericSeqSize)
-      dec t.sizes, size+sizeof(Cell)
+      let size =
+        if t.kind == tyString:
+          cap + 1 + GenericSeqSize
+        else:
+          align(GenericSeqSize, t.base.align) + cap * t.base.size
+      atomicDec t.sizes, size+sizeof(Cell)
     else:
-      dec t.sizes, t.base.size+sizeof(Cell)
-    dec t.instances
+      atomicDec t.sizes, t.base.size+sizeof(Cell)
+    atomicDec t.instances
 
 template incTypeSize(typ, size) =
   when defined(nimTypeNames):
-    inc typ.instances
-    inc typ.sizes, size+sizeof(Cell)
+    atomicInc typ.instances
+    atomicInc typ.sizes, size+sizeof(Cell)
 
 proc dispose*(x: ForeignCell) =
   when hasThreadSupport:
@@ -148,69 +159,34 @@ else:
   iterator items(first: var GcStack): ptr GcStack = yield addr(first)
   proc len(stack: var GcStack): int = 1
 
-proc stackSize(stack: ptr GcStack): int {.noinline.} =
-  when nimCoroutines:
-    var pos = stack.pos
-  else:
-    var pos {.volatile.}: pointer
-    pos = addr(pos)
-
-  if pos != nil:
-    when defined(stackIncreases):
-      result = cast[ByteAddress](pos) -% cast[ByteAddress](stack.bottom)
-    else:
-      result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](pos)
-  else:
-    result = 0
-
-proc stackSize(): int {.noinline.} =
-  for stack in gch.stack.items():
-    result = result + stack.stackSize()
-
-when nimCoroutines:
-  proc setPosition(stack: ptr GcStack, position: pointer) =
-    stack.pos = position
-    stack.maxStackSize = max(stack.maxStackSize, stack.stackSize())
-
-  proc setPosition(stack: var GcStack, position: pointer) =
-    setPosition(addr(stack), position)
-
-  proc getActiveStack(gch: var GcHeap): ptr GcStack =
-    return gch.activeStack
-
-  proc isActiveStack(stack: ptr GcStack): bool =
-    return gch.activeStack == stack
-else:
-  # Stack positions do not need to be tracked if coroutines are not used.
-  proc setPosition(stack: ptr GcStack, position: pointer) = discard
-  proc setPosition(stack: var GcStack, position: pointer) = discard
-  # There is just one stack - main stack of the thread. It is active always.
-  proc getActiveStack(gch: var GcHeap): ptr GcStack = addr(gch.stack)
-  proc isActiveStack(stack: ptr GcStack): bool = true
-
-when declared(threadType):
+when defined(nimdoc):
   proc setupForeignThreadGc*() {.gcsafe.} =
     ## Call this if you registered a callback that will be run from a thread not
     ## under your control. This has a cheap thread-local guard, so the GC for
     ## this thread will only be initialized once per thread, no matter how often
     ## it is called.
     ##
-    ## This function is available only when ``--threads:on`` and ``--tlsEmulation:off``
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
+    ## switches are used
+    discard
+
+  proc tearDownForeignThreadGc*() {.gcsafe.} =
+    ## Call this to tear down the GC, previously initialized by `setupForeignThreadGc`.
+    ## If GC has not been previously initialized, or has already been torn down, the
+    ## call does nothing.
+    ##
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
     ## switches are used
+    discard
+elif declared(threadType):
+  proc setupForeignThreadGc*() {.gcsafe.} =
     if threadType == ThreadType.None:
-      initAllocator()
       var stackTop {.volatile.}: pointer
       nimGC_setStackBottom(addr(stackTop))
       initGC()
       threadType = ThreadType.ForeignThread
 
   proc tearDownForeignThreadGc*() {.gcsafe.} =
-    ## Call this to tear down the GC, previously initialized by ``setupForeignThreadGc``.
-    ## If GC has not been previously initialized, or has already been torn down, the
-    ## call does nothing.
-    ##
-    ## This function is available only when ``--threads:on`` and ``--tlsEmulation:off``
-    ## switches are used
     if threadType != ThreadType.ForeignThread:
       return
     when declared(deallocOsPages): deallocOsPages()
@@ -227,7 +203,7 @@ else:
 # ----------------- stack management --------------------------------------
 #  inspired from Smart Eiffel
 
-when defined(emscripten):
+when defined(emscripten) or defined(wasm):
   const stackIncreases = true
 elif defined(sparc):
   const stackIncreases = false
@@ -237,25 +213,69 @@ elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
 else:
   const stackIncreases = false
 
+proc stackSize(stack: ptr GcStack): int {.noinline.} =
+  when nimCoroutines:
+    var pos = stack.pos
+  else:
+    var pos {.volatile, noinit.}: pointer
+    pos = addr(pos)
+
+  if pos != nil:
+    when stackIncreases:
+      result = cast[int](pos) -% cast[int](stack.bottom)
+    else:
+      result = cast[int](stack.bottom) -% cast[int](pos)
+  else:
+    result = 0
+
+proc stackSize(): int {.noinline.} =
+  result = 0
+  for stack in gch.stack.items():
+    result = result + stack.stackSize()
+
+when nimCoroutines:
+  proc setPosition(stack: ptr GcStack, position: pointer) =
+    stack.pos = position
+    stack.maxStackSize = max(stack.maxStackSize, stack.stackSize())
+
+  proc setPosition(stack: var GcStack, position: pointer) =
+    setPosition(addr(stack), position)
+
+  proc getActiveStack(gch: var GcHeap): ptr GcStack =
+    return gch.activeStack
+
+  proc isActiveStack(stack: ptr GcStack): bool =
+    return gch.activeStack == stack
+else:
+  # Stack positions do not need to be tracked if coroutines are not used.
+  proc setPosition(stack: ptr GcStack, position: pointer) = discard
+  proc setPosition(stack: var GcStack, position: pointer) = discard
+  # There is just one stack - main stack of the thread. It is active always.
+  proc getActiveStack(gch: var GcHeap): ptr GcStack = addr(gch.stack)
+  proc isActiveStack(stack: ptr GcStack): bool = true
+
 {.push stack_trace: off.}
 when nimCoroutines:
-  proc GC_addStack(bottom: pointer) {.cdecl, exportc.} =
+  proc GC_addStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
     # c_fprintf(stdout, "GC_addStack: %p;\n", bottom)
     var stack = gch.stack.append()
     stack.bottom = bottom
     stack.setPosition(bottom)
 
-  proc GC_removeStack(bottom: pointer) {.cdecl, exportc.} =
+  proc GC_removeStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
     # c_fprintf(stdout, "GC_removeStack: %p;\n", bottom)
     gch.stack.find(bottom).remove()
 
-  proc GC_setActiveStack(bottom: pointer) {.cdecl, exportc.} =
+  proc GC_setActiveStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
     ## Sets active stack and updates current stack position.
     # c_fprintf(stdout, "GC_setActiveStack: %p;\n", bottom)
     var sp {.volatile.}: pointer
     gch.activeStack = gch.stack.find(bottom)
     gch.activeStack.setPosition(addr(sp))
 
+  proc GC_getActiveStack() : pointer {.cdecl, exportc.} =
+    return gch.activeStack.bottom
+
 when not defined(useNimRtl):
   proc nimGC_setStackBottom(theStackBottom: pointer) =
     # Initializes main stack of the thread.
@@ -275,25 +295,28 @@ when not defined(useNimRtl):
       # the first init must be the one that defines the stack bottom:
       gch.stack.bottom = theStackBottom
     elif theStackBottom != gch.stack.bottom:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stack.bottom)
+      var a = cast[int](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[int](gch.stack.bottom)
       #c_fprintf(stdout, "old: %p new: %p;\n",gch.stack.bottom,theStackBottom)
       when stackIncreases:
         gch.stack.bottom = cast[pointer](min(a, b))
       else:
         gch.stack.bottom = cast[pointer](max(a, b))
 
+    when nimCoroutines:
+      if theStackBottom != nil: gch.stack.bottom = theStackBottom
+
     gch.stack.setPosition(theStackBottom)
 {.pop.}
 
 proc isOnStack(p: pointer): bool =
-  var stackTop {.volatile.}: pointer
+  var stackTop {.volatile, noinit.}: pointer
   stackTop = addr(stackTop)
-  var a = cast[ByteAddress](gch.getActiveStack().bottom)
-  var b = cast[ByteAddress](stackTop)
+  var a = cast[int](gch.getActiveStack().bottom)
+  var b = cast[int](stackTop)
   when not stackIncreases:
     swap(a, b)
-  var x = cast[ByteAddress](p)
+  var x = cast[int](p)
   result = a <=% x and x <=% b
 
 when defined(sparc): # For SPARC architecture.
@@ -314,7 +337,7 @@ when defined(sparc): # For SPARC architecture.
     # Addresses decrease as the stack grows.
     while sp <= max:
       gcMark(gch, sp[])
-      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
+      sp = cast[PPointer](cast[int](sp) +% sizeof(pointer))
 
 elif defined(ELATE):
   {.error: "stack marking code is to be written for this architecture".}
@@ -323,21 +346,28 @@ elif stackIncreases:
   # ---------------------------------------------------------------------------
   # Generic code for architectures where addresses increase as the stack grows.
   # ---------------------------------------------------------------------------
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
+  when defined(emscripten) or defined(wasm):
+    var
+      jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+        # a little hack to get the size of a JmpBuf in the generated C code
+        # in a platform independent way
+
+  template forEachStackSlotAux(gch, gcMark: untyped) {.dirty.} =
+    for stack in gch.stack.items():
+      var max = cast[int](gch.stack.bottom)
+      var sp = cast[int](addr(registers)) -% sizeof(pointer)
+      while sp >=% max:
+        gcMark(gch, cast[PPointer](sp)[])
+        sp = sp -% sizeof(pointer)
 
   template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
-    var registers {.noinit.}: C_JmpBuf
-
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      for stack in gch.stack.items():
-        var max = cast[ByteAddress](gch.stack.bottom)
-        var sp = cast[ByteAddress](addr(registers)) -% sizeof(pointer)
-        while sp >=% max:
-          gcMark(gch, cast[PPointer](sp)[])
-          sp = sp -% sizeof(pointer)
+    when defined(emscripten) or defined(wasm):
+      var registers: cint
+      forEachStackSlotAux(gch, gcMark)
+    else:
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        forEachStackSlotAux(gch, gcMark)
 
 else:
   # ---------------------------------------------------------------------------
@@ -353,15 +383,14 @@ else:
     gch.getActiveStack().setPosition(addr(registers))
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
       for stack in gch.stack.items():
-        var max = cast[ByteAddress](stack.bottom)
-        var sp = cast[ByteAddress](addr(registers))
+        var max = cast[int](stack.bottom)
+        var sp = cast[int](addr(registers))
         when defined(amd64):
           if stack.isActiveStack():
             # words within the jmp_buf structure may not be properly aligned.
             let regEnd = sp +% sizeof(registers)
             while sp <% regEnd:
               gcMark(gch, cast[PPointer](sp)[])
-              gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
               sp = sp +% sizeof(pointer)
         # Make sure sp is word-aligned
         sp = sp and not (sizeof(pointer) - 1)
@@ -385,7 +414,7 @@ else:
 # end of non-portable code
 # ----------------------------------------------------------------------------
 
-proc prepareDealloc(cell: PCell) =
+proc prepareDealloc(cell: PCell) {.raises: [].} =
   when declared(useMarkForDebug):
     when useMarkForDebug:
       gcAssert(cell notin gch.marked, "Cell still alive!")
@@ -394,7 +423,7 @@ proc prepareDealloc(cell: PCell) =
     # the finalizer could invoke something that
     # allocates memory; this could trigger a garbage
     # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
+    # prevent recursive entering here by a lock.
     # XXX: we should set the cell's children to nil!
     inc(gch.recGcLock)
     (cast[Finalizer](t.finalizer))(cellToUsr(cell))
@@ -402,10 +431,10 @@ proc prepareDealloc(cell: PCell) =
   decTypeSize(cell, t)
 
 proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
-  ## Frees the thread local heap. Runs every finalizer if ``runFinalizers```
-  ## is true. If ``allowGcAfterwards`` is true, a minimal amount of allocation
+  ## Frees the thread local heap. Runs every finalizer if `runFinalizers`
+  ## is true. If `allowGcAfterwards` is true, a minimal amount of allocation
   ## happens to ensure the GC can continue to work after the call
-  ## to ``deallocHeap``.
+  ## to `deallocHeap`.
   template deallocCell(x) =
     if isCell(x):
       # cast to PCell is correct here:
@@ -428,26 +457,26 @@ proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
     initGC()
 
 type
-  GlobalMarkerProc = proc () {.nimcall, benign.}
+  GlobalMarkerProc = proc () {.nimcall, benign, raises: [].}
 var
-  globalMarkersLen: int
-  globalMarkers: array[0.. 3499, GlobalMarkerProc]
-  threadLocalMarkersLen: int
-  threadLocalMarkers: array[0.. 3499, GlobalMarkerProc]
+  globalMarkersLen {.exportc.}: int
+  globalMarkers {.exportc.}: array[0..3499, GlobalMarkerProc]
+  threadLocalMarkersLen {.exportc.}: int
+  threadLocalMarkers {.exportc.}: array[0..3499, GlobalMarkerProc]
   gHeapidGenerator: int
 
-proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
   if globalMarkersLen <= high(globalMarkers):
     globalMarkers[globalMarkersLen] = markerProc
     inc globalMarkersLen
   else:
-    echo "[GC] cannot register global variable; too many global variables"
-    quit 1
+    cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
+    rawQuit 1
 
-proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
+proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
   if threadLocalMarkersLen <= high(threadLocalMarkers):
     threadLocalMarkers[threadLocalMarkersLen] = markerProc
     inc threadLocalMarkersLen
   else:
-    echo "[GC] cannot register thread local variable; too many thread local variables"
-    quit 1
+    cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
+    rawQuit 1
diff --git a/lib/system/gc_hooks.nim b/lib/system/gc_hooks.nim
new file mode 100644
index 000000000..ace62eea0
--- /dev/null
+++ b/lib/system/gc_hooks.nim
@@ -0,0 +1,53 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Hooks for memory management. Can be used to implement custom garbage
+## collectors etc.
+
+type
+  GlobalMarkerProc = proc () {.nimcall, benign, raises: [], tags: [].}
+var
+  globalMarkersLen: int
+  globalMarkers: array[0..3499, GlobalMarkerProc]
+  threadLocalMarkersLen: int
+  threadLocalMarkers: array[0..3499, GlobalMarkerProc]
+
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if globalMarkersLen <= high(globalMarkers):
+    globalMarkers[globalMarkersLen] = markerProc
+    inc globalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
+    rawQuit 1
+
+proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if threadLocalMarkersLen <= high(threadLocalMarkers):
+    threadLocalMarkers[threadLocalMarkersLen] = markerProc
+    inc threadLocalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
+    rawQuit 1
+
+proc traverseGlobals*() =
+  for i in 0..globalMarkersLen-1:
+    globalMarkers[i]()
+
+proc traverseThreadLocals*() =
+  for i in 0..threadLocalMarkersLen-1:
+    threadLocalMarkers[i]()
+
+var
+  newObjHook*: proc (typ: PNimType, size: int): pointer {.nimcall, tags: [], raises: [], gcsafe.}
+  traverseObjHook*: proc (p: pointer, op: int) {.nimcall, tags: [], raises: [], gcsafe.}
+
+proc nimGCvisit(p: pointer, op: int) {.inl, compilerRtl.} =
+  traverseObjHook(p, op)
+
+proc newObj(typ: PNimType, size: int): pointer {.inl, compilerRtl.} =
+  result = newObjHook(typ, size)
diff --git a/lib/system/gc_interface.nim b/lib/system/gc_interface.nim
new file mode 100644
index 000000000..84145f33a
--- /dev/null
+++ b/lib/system/gc_interface.nim
@@ -0,0 +1,100 @@
+# ----------------- GC interface ---------------------------------------------
+const
+  usesDestructors = defined(gcDestructors) or defined(gcHooks)
+
+when not usesDestructors:
+  {.pragma: nodestroy.}
+
+when hasAlloc:
+  type
+    GC_Strategy* = enum  ## The strategy the GC should use for the application.
+      gcThroughput,      ## optimize for throughput
+      gcResponsiveness,  ## optimize for responsiveness (default)
+      gcOptimizeTime,    ## optimize for speed
+      gcOptimizeSpace    ## optimize for memory footprint
+
+when hasAlloc and not defined(js) and not usesDestructors:
+  proc GC_disable*() {.rtl, inl, benign, raises: [].}
+    ## Disables the GC. If called `n` times, `n` calls to `GC_enable`
+    ## are needed to reactivate the GC.
+    ##
+    ## Note that in most circumstances one should only disable
+    ## the mark and sweep phase with
+    ## `GC_disableMarkAndSweep <#GC_disableMarkAndSweep>`_.
+
+  proc GC_enable*() {.rtl, inl, benign, raises: [].}
+    ## Enables the GC again.
+
+  proc GC_fullCollect*() {.rtl, benign.}
+    ## Forces a full garbage collection pass.
+    ## Ordinary code does not need to call this (and should not).
+
+  proc GC_enableMarkAndSweep*() {.rtl, benign.}
+  proc GC_disableMarkAndSweep*() {.rtl, benign.}
+    ## The current implementation uses a reference counting garbage collector
+    ## with a seldomly run mark and sweep phase to free cycles. The mark and
+    ## sweep phase may take a long time and is not needed if the application
+    ## does not create cycles. Thus the mark and sweep phase can be deactivated
+    ## and activated separately from the rest of the GC.
+
+  proc GC_getStatistics*(): string {.rtl, benign.}
+    ## Returns an informative string about the GC's activity. This may be useful
+    ## for tweaking.
+
+  proc GC_ref*[T](x: ref T) {.magic: "GCref", benign.}
+  proc GC_ref*[T](x: seq[T]) {.magic: "GCref", benign.}
+  proc GC_ref*(x: string) {.magic: "GCref", benign.}
+    ## Marks the object `x` as referenced, so that it will not be freed until
+    ## it is unmarked via `GC_unref`.
+    ## If called n-times for the same object `x`,
+    ## n calls to `GC_unref` are needed to unmark `x`.
+
+  proc GC_unref*[T](x: ref T) {.magic: "GCunref", benign.}
+  proc GC_unref*[T](x: seq[T]) {.magic: "GCunref", benign.}
+  proc GC_unref*(x: string) {.magic: "GCunref", benign.}
+    ## See the documentation of `GC_ref <#GC_ref,string>`_.
+
+  proc nimGC_setStackBottom*(theStackBottom: pointer) {.compilerRtl, noinline, benign, raises: [].}
+    ## Expands operating GC stack range to `theStackBottom`. Does nothing
+      ## if current stack bottom is already lower than `theStackBottom`.
+
+when hasAlloc and defined(js):
+  template GC_disable* =
+    {.warning: "GC_disable is a no-op in JavaScript".}
+
+  template GC_enable* =
+    {.warning: "GC_enable is a no-op in JavaScript".}
+
+  template GC_fullCollect* =
+    {.warning: "GC_fullCollect is a no-op in JavaScript".}
+
+  template GC_setStrategy* =
+    {.warning: "GC_setStrategy is a no-op in JavaScript".}
+
+  template GC_enableMarkAndSweep* =
+    {.warning: "GC_enableMarkAndSweep is a no-op in JavaScript".}
+
+  template GC_disableMarkAndSweep* =
+    {.warning: "GC_disableMarkAndSweep is a no-op in JavaScript".}
+
+  template GC_ref*[T](x: ref T) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_ref*[T](x: seq[T]) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_ref*(x: string) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_unref*[T](x: ref T) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_unref*[T](x: seq[T]) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_unref*(x: string) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_getStatistics*(): string =
+    {.warning: "GC_getStatistics is a no-op in JavaScript".}
+    ""
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index 75f9c6749..c885a6893 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -27,7 +27,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 type
   WalkOp = enum
@@ -36,7 +36,7 @@ type
                    # local
     waMarkPrecise  # fast precise marking
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
@@ -85,36 +85,26 @@ var
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    acquireSys(HeapLock)
-
-template release(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
 template gcAssert(cond: bool, msg: string) =
   when defined(useGcAssert):
     if not cond:
-      echo "[GCASSERT] ", msg
-      quit 1
+      cstderr.rawWrite "[GCASSERT] "
+      cstderr.rawWrite msg
+      rawQuit 1
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
-
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
 
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.inline.} =
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.inline, compilerproc.} =
   dest[] = src
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
@@ -125,10 +115,10 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var GcHeap; size: int) {.benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
+proc collectCT(gch: var GcHeap; size: int) {.benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
 when defined(nimGcRefLeak):
@@ -172,7 +162,7 @@ when defined(nimGcRefLeak):
 
   var ax: array[10_000, GcStackTrace]
 
-proc nimGCref(p: pointer) {.compilerProc.} =
+proc nimGCref(p: pointer) {.compilerproc.} =
   # we keep it from being collected by pretending it's not even allocated:
   when false:
     when withBitvectors: excl(gch.allocated, usrToCell(p))
@@ -181,7 +171,7 @@ proc nimGCref(p: pointer) {.compilerProc.} =
     captureStackTrace(framePtr, ax[gch.additionalRoots.len])
   add(gch.additionalRoots, usrToCell(p))
 
-proc nimGCunref(p: pointer) {.compilerProc.} =
+proc nimGCunref(p: pointer) {.compilerproc.} =
   let cell = usrToCell(p)
   var L = gch.additionalRoots.len-1
   var i = L
@@ -227,7 +217,7 @@ proc initGC() =
     gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -239,11 +229,11 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
-    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
+    of tyRef, tyString, tySequence: # leaf:
       doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
@@ -255,31 +245,30 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
 proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(cell.typ != nil, "forAllChildren: 2")
-  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: 3"
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
     case cell.typ.kind
-    of tyRef, tyOptAsRef: # common case
+    of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
+      when not defined(nimSeqsV2):
+        var d = cast[int](cellToUsr(cell))
+        var s = cast[PGenericSeq](d)
+        if s != nil:
+          for i in 0..s.len-1:
+            forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
     else: discard
 
 proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   incTypeSize typ, size
-  acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch, size + sizeof(Cell))
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   when leakDetector and not hasThreadSupport:
@@ -287,7 +276,6 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
       res.filename = framePtr.prev.filename
       res.line = framePtr.prev.line
   res.refcount = 0
-  release(gch)
   when withBitvectors: incl(gch.allocated, res)
   when useCellIds:
     inc gch.idGenerator
@@ -310,53 +298,56 @@ proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   when defined(memProfiler): nimProfile(size)
 
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  acquire(gch)
-  collectCT(gch, newsize + sizeof(Cell))
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  incTypeSize ol.typ, newsize
-
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when withBitvectors: incl(gch.allocated, res)
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  release(gch)
-  result = cellToUsr(res)
-  when defined(memProfiler): nimProfile(newsize-oldsize)
+when not defined(nimSeqsV2):
+  {.push overflowChecks: on.}
+  proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+    # `newObj` already uses locks, so no need for them here.
+    let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+    result = newObj(typ, size)
+    cast[PGenericSeq](result).len = len
+    cast[PGenericSeq](result).reserved = len
+    when defined(memProfiler): nimProfile(size)
+
+  proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+    let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+    result = newObj(typ, size)
+    cast[PGenericSeq](result).len = len
+    cast[PGenericSeq](result).reserved = len
+    when defined(memProfiler): nimProfile(size)
+  {.pop.}
+
+  proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
+    collectCT(gch, newsize + sizeof(Cell))
+    var ol = usrToCell(old)
+    sysAssert(ol.typ != nil, "growObj: 1")
+    gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+
+    var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
+    var elemSize, elemAlign = 1
+    if ol.typ.kind != tyString:
+      elemSize = ol.typ.base.size
+      elemAlign = ol.typ.base.align
+    incTypeSize ol.typ, newsize
+
+    var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
+    copyMem(res, ol, oldsize + sizeof(Cell))
+    zeroMem(cast[pointer](cast[int](res)+% oldsize +% sizeof(Cell)),
+            newsize-oldsize)
+    sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
+    when withBitvectors: incl(gch.allocated, res)
+    when useCellIds:
+      inc gch.idGenerator
+      res.id = gch.idGenerator
+    result = cellToUsr(res)
+    when defined(memProfiler): nimProfile(newsize-oldsize)
 
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, gch)
+  proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+    result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
 
@@ -384,14 +375,14 @@ proc mark(gch: var GcHeap, c: PCell) =
                   c, c.typ.name)
         inc gch.indentation, 2
 
-    c.refCount = rcBlack
+    c.refcount = rcBlack
     gcAssert gch.tempStack.len == 0, "stack not empty!"
     forAllChildren(c, waMarkPrecise)
     while gch.tempStack.len > 0:
       dec gch.tempStack.len
       var d = gch.tempStack.d[gch.tempStack.len]
       if d.refcount == rcWhite:
-        d.refCount = rcBlack
+        d.refcount = rcBlack
         forAllChildren(d, waMarkPrecise)
 
     when defined(nimTracing):
@@ -437,15 +428,6 @@ proc sweep(gch: var GcHeap) =
         if c.refcount == rcBlack: c.refcount = rcWhite
         else: freeCyclicCell(gch, c)
 
-when false:
-  proc newGcInvariant*() =
-    for x in allObjects(gch.region):
-      if isCell(x):
-        var c = cast[PCell](x)
-        if c.typ == nil:
-          writeStackTrace()
-          quit 1
-
 proc markGlobals(gch: var GcHeap) =
   if gch.gcThreadId == 0:
     when defined(nimTracing):
@@ -464,11 +446,10 @@ proc markGlobals(gch: var GcHeap) =
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       mark(gch, objStart)
 
@@ -495,24 +476,20 @@ proc collectCTBody(gch: var GcHeap) =
   sysAssert(allocInv(gch.region), "collectCT: end")
 
 proc collectCT(gch: var GcHeap; size: int) =
+  let fmem = getFreeMem(gch.region)
   if (getOccupiedMem(gch.region) >= gch.cycleThreshold or
-      size > getFreeMem(gch.region)) and gch.recGcLock == 0:
+      size > fmem and fmem > InitialThreshold) and gch.recGcLock == 0:
     collectCTBody(gch)
 
 when not defined(useNimRtl):
   proc GC_disable() =
-    when hasThreadSupport and hasSharedHeap:
-      atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
+    inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock <= 0:
-      raise newException(AssertionError,
-          "API usage error: GC_enable called but GC is already enabled")
-    when hasThreadSupport and hasSharedHeap:
-      atomicDec(gch.recGcLock, 1)
-    else:
-      dec(gch.recGcLock)
+    when defined(nimDoesntTrackDefects):
+      if gch.recGcLock <= 0:
+        raise newException(AssertionDefect,
+            "API usage error: GC_enable called but GC is already enabled")
+    dec(gch.recGcLock)
 
   proc GC_setStrategy(strategy: GC_Strategy) = discard
 
@@ -520,7 +497,7 @@ when not defined(useNimRtl):
     gch.cycleThreshold = InitialThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
   when defined(nimTracing):
@@ -528,12 +505,10 @@ when not defined(useNimRtl):
       gch.tracing = true
 
   proc GC_fullCollect() =
-    acquire(gch)
-    var oldThreshold = gch.cycleThreshold
+    let oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
     collectCT(gch, 0)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
     result = "[GC] total memory: " & $getTotalMem() & "\n" &
diff --git a/lib/system/gc_regions.nim b/lib/system/gc_regions.nim
index 06fded86b..d96de7eac 100644
--- a/lib/system/gc_regions.nim
+++ b/lib/system/gc_regions.nim
@@ -7,12 +7,16 @@
 #
 
 # "Stack GC" for embedded devices or ultra performance requirements.
+import std/private/syslocks
 
-when defined(nimphpext):
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int) {.benign.}
+
+when defined(useMalloc):
   proc roundup(x, v: int): int {.inline.} =
     result = (x + (v-1)) and not (v-1)
-  proc emalloc(size: int): pointer {.importc: "_emalloc".}
-  proc efree(mem: pointer) {.importc: "_efree".}
+  proc emalloc(size: int): pointer {.importc: "malloc", header: "<stdlib.h>".}
+  proc efree(mem: pointer) {.importc: "free", header: "<stdlib.h>".}
 
   proc osAllocPages(size: int): pointer {.inline.} =
     emalloc(size)
@@ -84,16 +88,16 @@ type
     region: ptr MemRegion
 
 var
-  tlRegion {.threadVar.}: MemRegion
-#  tempStrRegion {.threadVar.}: MemRegion  # not yet used
+  tlRegion {.threadvar.}: MemRegion
+#  tempStrRegion {.threadvar.}: MemRegion  # not yet used
 
-template withRegion*(r: MemRegion; body: untyped) =
+template withRegion*(r: var MemRegion; body: untyped) =
   let oldRegion = tlRegion
   tlRegion = r
   try:
     body
   finally:
-    #r = tlRegion
+    r = tlRegion
     tlRegion = oldRegion
 
 template inc(p: pointer, s: int) =
@@ -108,6 +112,8 @@ template `+!`(p: pointer, s: int): pointer =
 template `-!`(p: pointer, s: int): pointer =
   cast[pointer](cast[int](p) -% s)
 
+const nimMinHeapPages {.intdefine.} = 4
+
 proc allocSlowPath(r: var MemRegion; size: int) =
   # we need to ensure that the underlying linked list
   # stays small. Say we want to grab 16GB of RAM with some
@@ -116,9 +122,8 @@ proc allocSlowPath(r: var MemRegion; size: int) =
   # 8MB, 16MB, 32MB, 64MB, 128MB, 512MB, 1GB, 2GB, 4GB, 8GB,
   # 16GB --> list contains only 20 elements! That's reasonable.
   if (r.totalSize and 1) == 0:
-    r.nextChunkSize =
-      if r.totalSize < 64 * 1024: PageSize*4
-      else: r.nextChunkSize*2
+    r.nextChunkSize = if r.totalSize < 64 * 1024: PageSize*nimMinHeapPages
+                      else: r.nextChunkSize*2
   var s = roundup(size+sizeof(BaseChunk), PageSize)
   var fresh: Chunk
   if s > r.nextChunkSize:
@@ -179,12 +184,25 @@ proc runFinalizers(c: Chunk) =
       (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
     it = it.nextFinal
 
+proc runFinalizers(c: Chunk; newbump: pointer) =
+  var it = c.head
+  var prev: ptr ObjHeader = nil
+  while it != nil:
+    let nxt = it.nextFinal
+    if it >= newbump:
+      if it.typ != nil and it.typ.finalizer != nil:
+        (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
+    elif prev != nil:
+      prev.nextFinal = nil
+    prev = it
+    it = nxt
+
 proc dealloc(r: var MemRegion; p: pointer; size: int) =
   let it = cast[ptr ObjHeader](p-!sizeof(ObjHeader))
   if it.typ != nil and it.typ.finalizer != nil:
     (cast[Finalizer](it.typ.finalizer))(p)
   it.typ = nil
-  # it is benefitial to not use the free lists here:
+  # it is beneficial to not use the free lists here:
   if r.bump -! size == p:
     dec r.bump, size
   when false:
@@ -221,16 +239,15 @@ template computeRemaining(r): untyped =
 
 proc setObstackPtr*(r: var MemRegion; sp: StackPtr) =
   # free everything after 'sp':
-  if sp.current.next != nil:
+  if sp.current != nil and sp.current.next != nil:
     deallocAll(r, sp.current.next)
     sp.current.next = nil
     when false:
       # better leak this memory than be sorry:
       for i in 0..high(r.freeLists): r.freeLists[i] = nil
       r.holes = nil
-  #else:
-  #  deallocAll(r, r.head)
-  #  r.head = nil
+  if r.tail != nil: runFinalizers(r.tail, sp.bump)
+
   r.bump = sp.bump
   r.tail = sp.current
   r.remaining = sp.remaining
@@ -241,15 +258,21 @@ proc deallocAll*() = tlRegion.deallocAll()
 
 proc deallocOsPages(r: var MemRegion) = r.deallocAll()
 
+when false:
+  let obs = obstackPtr()
+  try:
+    body
+  finally:
+    setObstackPtr(obs)
+
 template withScratchRegion*(body: untyped) =
-  var scratch: MemRegion
   let oldRegion = tlRegion
-  tlRegion = scratch
+  tlRegion = MemRegion()
   try:
     body
   finally:
+    deallocAll()
     tlRegion = oldRegion
-    deallocAll(scratch)
 
 when false:
   proc joinRegion*(dest: var MemRegion; src: MemRegion) =
@@ -299,20 +322,21 @@ proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(tlRegion, typ, size)
   when defined(memProfiler): nimProfile(size)
 
+{.push overflowChecks: on.}
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = roundup(addInt(mulInt(len, typ.base.size), GenericSeqSize),
-                     MemAlign)
+  let size = roundup(align(GenericSeqSize, typ.base.align) + len * typ.base.size, MemAlign)
   result = rawNewSeq(tlRegion, typ, size)
   zeroMem(result, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
 proc newStr(typ: PNimType, len: int; init: bool): pointer {.compilerRtl.} =
-  let size = roundup(addInt(len, GenericSeqSize), MemAlign)
+  let size = roundup(len + GenericSeqSize, MemAlign)
   result = rawNewSeq(tlRegion, typ, size)
   if init: zeroMem(result, size)
   cast[PGenericSeq](result).len = 0
   cast[PGenericSeq](result).reserved = len
+{.pop.}
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(tlRegion, typ, size)
@@ -327,7 +351,8 @@ proc growObj(regionUnused: var MemRegion; old: pointer, newsize: int): pointer =
   result = rawNewSeq(sh.region[], typ,
                      roundup(newsize, MemAlign))
   let elemSize = if typ.kind == tyString: 1 else: typ.base.size
-  let oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  let elemAlign = if typ.kind == tyString: 1 else: typ.base.align
+  let oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
   zeroMem(result +! oldsize, newsize-oldsize)
   copyMem(result, old, oldsize)
   dealloc(sh.region[], old, roundup(oldsize, MemAlign))
@@ -339,19 +364,24 @@ proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   dest[] = src
 proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   dest[] = src
-proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
 
-proc alloc(size: Natural): pointer =
-  result = c_malloc(size)
+proc allocImpl(size: Natural): pointer =
+  result = c_malloc(cast[csize_t](size))
   if result == nil: raiseOutOfMem()
-proc alloc0(size: Natural): pointer =
+proc alloc0Impl(size: Natural): pointer =
   result = alloc(size)
   zeroMem(result, size)
-proc realloc(p: pointer, newsize: Natural): pointer =
-  result = c_realloc(p, newsize)
+proc reallocImpl(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+proc realloc0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
   if result == nil: raiseOutOfMem()
-proc dealloc(p: pointer) = c_free(p)
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+proc deallocImpl(p: pointer) = c_free(p)
 
 proc alloc0(r: var MemRegion; size: Natural): pointer =
   # ignore the region. That is correct for the channels module
@@ -365,16 +395,21 @@ proc alloc(r: var MemRegion; size: Natural): pointer =
 
 proc dealloc(r: var MemRegion; p: pointer) = dealloc(p)
 
-proc allocShared(size: Natural): pointer =
-  result = c_malloc(size)
+proc allocSharedImpl(size: Natural): pointer =
+  result = c_malloc(cast[csize_t](size))
   if result == nil: raiseOutOfMem()
-proc allocShared0(size: Natural): pointer =
+proc allocShared0Impl(size: Natural): pointer =
   result = alloc(size)
   zeroMem(result, size)
-proc reallocShared(p: pointer, newsize: Natural): pointer =
-  result = c_realloc(p, newsize)
+proc reallocSharedImpl(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+proc reallocShared0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
   if result == nil: raiseOutOfMem()
-proc deallocShared(p: pointer) = c_free(p)
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+proc deallocSharedImpl(p: pointer) = c_free(p)
 
 when hasThreadSupport:
   proc getFreeSharedMem(): int = 0
@@ -403,5 +438,5 @@ proc getTotalMem*(r: MemRegion): int =
 
 proc nimGC_setStackBottom(theStackBottom: pointer) = discard
 
-proc nimGCref(x: pointer) {.compilerProc.} = discard
-proc nimGCunref(x: pointer) {.compilerProc.} = discard
+proc nimGCref(x: pointer) {.compilerproc.} = discard
+proc nimGCunref(x: pointer) {.compilerproc.} = discard
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 45b1d1cd3..a26aff982 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -7,12 +7,6 @@
 #    distribution, for details about the copyright.
 #
 
-when declared(NimString):
-  # we are in system module:
-  {.pragma: codegenType, compilerproc.}
-else:
-  {.pragma: codegenType, importc.}
-
 type
   # This should be the same as ast.TTypeKind
   # many enum fields are not used at runtime
@@ -23,8 +17,8 @@ type
     tyEmpty,
     tyArrayConstr,
     tyNil,
-    tyExpr,
-    tyStmt,
+    tyUntyped,
+    tyTyped,
     tyTypeDesc,
     tyGenericInvocation, # ``T[a, b]`` for types to invoke
     tyGenericBody,       # ``T[a, b, body]`` last parameter is the body
@@ -46,7 +40,7 @@ type
     tyPointer,
     tyOpenArray,
     tyString,
-    tyCString,
+    tyCstring,
     tyForward,
     tyInt,
     tyInt8,
@@ -62,10 +56,10 @@ type
     tyUInt16,
     tyUInt32,
     tyUInt64,
-    tyOptAsRef, tyUnused1, tyUnused2,
+    tyOwned, tyUnused1, tyUnused2,
     tyVarargsHidden,
-    tyUnusedHidden,
-    tyProxyHidden,
+    tyUncheckedArray,
+    tyErrorHidden,
     tyBuiltInTypeClassHidden,
     tyUserTypeClassHidden,
     tyUserTypeClassInstHidden,
@@ -75,11 +69,11 @@ type
     tyAnythingHidden,
     tyStaticHidden,
     tyFromExprHidden,
-    tyOpt,
+    tyOptDeprecated,
     tyVoidHidden
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
-  TNimNode {.codegenType.} = object
+  TNimNode {.compilerproc.} = object
     kind: TNimNodeKind
     offset: int
     typ: ptr TNimType
@@ -92,23 +86,38 @@ type
     ntfAcyclic = 1,    # type cannot form a cycle
     ntfEnumHole = 2    # enum has holes and thus `$` for them needs the slow
                        # version
-  TNimType {.codegenType.} = object
-    size: int
+  TNimType {.compilerproc.} = object
+    when defined(gcHooks):
+      head*: pointer
+    size*: int
+    align*: int
     kind: TNimKind
     flags: set[TNimTypeFlag]
-    base: ptr TNimType
+    base*: ptr TNimType
     node: ptr TNimNode # valid for tyRecord, tyObject, tyTuple, tyEnum
-    finalizer: pointer # the finalizer for the type
-    marker: proc (p: pointer, op: int) {.nimcall, benign.} # marker proc for GC
-    deepcopy: proc (p: pointer): pointer {.nimcall, benign.}
+    finalizer*: pointer # the finalizer for the type
+    marker*: proc (p: pointer, op: int) {.nimcall, benign, tags: [], raises: [].} # marker proc for GC
+    deepcopy: proc (p: pointer): pointer {.nimcall, benign, tags: [], raises: [].}
+    when defined(nimSeqsV2):
+      typeInfoV2*: pointer
     when defined(nimTypeNames):
       name: cstring
       nextType: ptr TNimType
       instances: int # count the number of instances
       sizes: int # sizes of all instances in bytes
-  PNimType = ptr TNimType
+
+when defined(gcHooks):
+  type
+    PNimType* = ptr TNimType
+else:
+  type
+    PNimType = ptr TNimType
 
 when defined(nimTypeNames):
-  var nimTypeRoot {.codegenType.}: PNimType
+  # Declare this variable only once in system.nim
+  when declared(ThisIsSystem):
+    var nimTypeRoot {.compilerproc.}: PNimType
+  else:
+    var nimTypeRoot {.importc.}: PNimType
 
 # node.len may be the ``first`` element of a set
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index f9e6754ef..3bf0b9893 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -19,11 +19,6 @@
 when not defined(nimNewShared):
   {.pragma: gcsafe.}
 
-when not defined(nimImmediateDeprecated):
-  {.pragma: oldimmediate, immediate.}
-else:
-  {.pragma: oldimmediate.}
-
 when defined(createNimRtl):
   when defined(useNimRtl):
     {.error: "Cannot create and use nimrtl at the same time!".}
@@ -35,12 +30,13 @@ when defined(createNimRtl):
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, exportc: "nimrtl_$1", dynlib.}
 elif defined(useNimRtl):
-  when defined(windows):
-    const nimrtl* = "nimrtl.dll"
-  elif defined(macosx):
-    const nimrtl* = "libnimrtl.dylib"
-  else:
-    const nimrtl* = "libnimrtl.so"
+  #[
+  `{.rtl.}` should only be used for non-generic procs.
+  ]#
+  const nimrtl* =
+    when defined(windows): "nimrtl.dll"
+    elif defined(macosx): "libnimrtl.dylib"
+    else: "libnimrtl.so"
   {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl, gcsafe.}
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, importc: "nimrtl_$1", dynlib: nimrtl.}
@@ -49,15 +45,6 @@ else:
   {.pragma: inl, inline.}
   {.pragma: compilerRtl, compilerproc.}
 
-when not defined(nimsuperops):
-  {.pragma: operator.}
+{.pragma: benign, gcsafe.}
 
-when defined(nimlocks):
-  {.pragma: benign, gcsafe, locks: 0.}
-else:
-  {.pragma: benign, gcsafe.}
-
-when defined(nimTableGet):
-  {.pragma: deprecatedGet, deprecated.}
-else:
-  {.pragma: deprecatedGet.}
+{.push sinkInference: on.}
diff --git a/lib/system/indexerrors.nim b/lib/system/indexerrors.nim
new file mode 100644
index 000000000..6a8cb8a0a
--- /dev/null
+++ b/lib/system/indexerrors.nim
@@ -0,0 +1,15 @@
+# imported by other modules, unlike helpers.nim which is included
+# xxx this is now included instead of imported, we should import instead
+
+template formatErrorIndexBound*[T](i, a, b: T): string =
+  when defined(standalone):
+    "indexOutOfBounds"
+  else:
+    if b < a: "index out of bounds, the container is empty"
+    else: "index " & $i & " not in " & $a & " .. " & $b
+
+template formatErrorIndexBound*[T](i, n: T): string =
+  formatErrorIndexBound(i, 0, n)
+
+template formatFieldDefect*(f, discVal): string =
+  f & discVal & "'"
diff --git a/lib/system/indices.nim b/lib/system/indices.nim
new file mode 100644
index 000000000..f2bad2528
--- /dev/null
+++ b/lib/system/indices.nim
@@ -0,0 +1,164 @@
+when not defined(nimHasSystemRaisesDefect):
+  {.pragma: systemRaisesDefect.}
+
+type
+  BackwardsIndex* = distinct int ## Type that is constructed by `^` for
+                                 ## reversed array accesses.
+                                 ## (See `^ template <#^.t,int>`_)
+
+template `^`*(x: int): BackwardsIndex = BackwardsIndex(x)
+  ## Builtin `roof`:idx: operator that can be used for convenient array access.
+  ## `a[^x]` is a shortcut for `a[a.len-x]`.
+  ##
+  ##   ```nim
+  ##   let
+  ##     a = [1, 3, 5, 7, 9]
+  ##     b = "abcdefgh"
+  ##
+  ##   echo a[^1] # => 9
+  ##   echo b[^2] # => g
+  ##   ```
+
+proc `[]`*[T](s: openArray[T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+
+proc `[]`*[Idx, T](a: array[Idx, T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: string; i: BackwardsIndex): char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]`*[T](s: var openArray[T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+proc `[]`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: var string; i: BackwardsIndex): var char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]=`*[T](s: var openArray[T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  system.`[]=`(s, s.len - int(i), x)
+proc `[]=`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))] = x
+proc `[]=`*(s: var string; i: BackwardsIndex; x: char) {.inline, systemRaisesDefect.} =
+  s[s.len - int(i)] = x
+
+template `..^`*(a, b: untyped): untyped =
+  ## A shortcut for `.. ^` to avoid the common gotcha that a space between
+  ## '..' and '^' is required.
+  a .. ^b
+
+template `..<`*(a, b: untyped): untyped =
+  ## A shortcut for `a .. pred(b)`.
+  ##   ```nim
+  ##   for i in 5 ..< 9:
+  ##     echo i # => 5; 6; 7; 8
+  ##   ```
+  a .. (when b is BackwardsIndex: succ(b) else: pred(b))
+
+template `[]`*(s: string; i: int): char = arrGet(s, i)
+template `[]=`*(s: string; i: int; val: char) = arrPut(s, i, val)
+
+template `^^`(s, i: untyped): untyped =
+  (when i is BackwardsIndex: s.len - int(i) else: int(i))
+
+template spliceImpl(s, a, L, b: typed): untyped =
+  # make room for additional elements or cut:
+  var shift = b.len - max(0,L)  # ignore negative slice size
+  var newLen = s.len + shift
+  if shift > 0:
+    # enlarge:
+    setLen(s, newLen)
+    for i in countdown(newLen-1, a+b.len): movingCopy(s[i], s[i-shift])
+  else:
+    for i in countup(a+b.len, newLen-1): movingCopy(s[i], s[i-shift])
+    # cut down:
+    setLen(s, newLen)
+  # fill the hole:
+  for i in 0 ..< b.len: s[a+i] = b[i]
+
+proc `[]`*[T, U: Ordinal](s: string, x: HSlice[T, U]): string {.inline, systemRaisesDefect.} =
+  ## Slice operation for strings.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = "abcdef"
+  ##   assert s[1..3] == "bcd"
+  ##   ```
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  result = newString(L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T, U: Ordinal](s: var string, x: HSlice[T, U], b: string) {.systemRaisesDefect.} =
+  ## Slice assignment for strings.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed:
+  ##
+  runnableExamples:
+    var s = "abcdefgh"
+    s[1 .. ^2] = "xyz"
+    assert s == "axyzh"
+
+  var a = s ^^ x.a
+  var L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0..<L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
+
+proc `[]`*[Idx, T; U, V: Ordinal](a: array[Idx, T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for arrays.
+  ## Returns the inclusive range `[a[x.a], a[x.b]]`:
+  ##   ```nim
+  ##   var a = [1, 2, 3, 4]
+  ##   assert a[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(array[I, T];I,I) <#toOpenArray,array[I,T],I,I>`_
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  result = newSeq[T](L)
+  for i in 0..<L: result[i] = a[Idx(i + xa)]
+
+proc `[]=`*[Idx, T; U, V: Ordinal](a: var array[Idx, T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for arrays.
+  ##   ```nim
+  ##   var a = [10, 20, 30, 40, 50]
+  ##   a[1..2] = @[99, 88]
+  ##   assert a == [10, 99, 88, 40, 50]
+  ##   ```
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  if L == b.len:
+    for i in 0..<L: a[Idx(i + xa)] = b[i]
+  else:
+    sysFatal(RangeDefect, "different lengths for slice assignment")
+
+proc `[]`*[T; U, V: Ordinal](s: openArray[T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for sequences.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = @[1, 2, 3, 4]
+  ##   assert s[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(openArray[T];int,int) <#toOpenArray,openArray[T],int,int>`_
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  newSeq(result, L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T; U, V: Ordinal](s: var seq[T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for sequences.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed.
+  runnableExamples:
+    var s = @"abcdefgh"
+    s[1 .. ^2] = @"xyz"
+    assert s == @"axyzh"
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0 ..< L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
diff --git a/lib/system/integerops.nim b/lib/system/integerops.nim
new file mode 100644
index 000000000..4ef3594f1
--- /dev/null
+++ b/lib/system/integerops.nim
@@ -0,0 +1,132 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Integer arithmetic with overflow checking. Uses
+# intrinsics or inline assembler.
+
+proc raiseOverflow {.compilerproc, noinline.} =
+  # a single proc to reduce code size to a minimum
+  sysFatal(OverflowDefect, "over- or underflow")
+
+proc raiseDivByZero {.compilerproc, noinline.} =
+  sysFatal(DivByZeroDefect, "division by zero")
+
+{.pragma: nimbaseH, importc, nodecl, noSideEffect, compilerproc.}
+
+when not defined(nimEmulateOverflowChecks):
+  # take the #define from nimbase.h
+
+  proc nimAddInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+  proc nimSubInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+  proc nimMulInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+
+  proc nimAddInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+  proc nimSubInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+  proc nimMulInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+
+# unary minus and 'abs' not required here anymore and are directly handled
+# in the code generator.
+# 'nimModInt' does exist in nimbase.h without check as we moved the
+# check for 0 to the codgen.
+proc nimModInt(a, b: int; res: ptr int): bool {.nimbaseH.}
+
+proc nimModInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+
+# Platform independent versions.
+
+template addImplFallback(name, T, U) {.dirty.} =
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) + cast[U](b))
+      if (r xor a) >= T(0) or (r xor b) >= T(0):
+        res[] = r
+      else:
+        result = true
+
+addImplFallback(nimAddInt, int, uint)
+addImplFallback(nimAddInt64, int64, uint64)
+
+template subImplFallback(name, T, U) {.dirty.} =
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) - cast[U](b))
+      if (r xor a) >= 0 or (r xor not b) >= 0:
+        res[] = r
+      else:
+        result = true
+
+subImplFallback(nimSubInt, int, uint)
+subImplFallback(nimSubInt64, int64, uint64)
+
+template mulImplFallback(name, T, U, conv) {.dirty.} =
+  #
+  # This code has been inspired by Python's source code.
+  # The native int product x*y is either exactly right or *way* off, being
+  # just the last n bits of the true product, where n is the number of bits
+  # in an int (the delivered product is the true product plus i*2**n for
+  # some integer i).
+  #
+  # The native float64 product x*y is subject to three
+  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
+  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
+  # But, unlike the native int product, it's not in *range* trouble:  even
+  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
+  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
+  # product are correct.
+  #
+  # We check these two ways against each other, and declare victory if
+  # they're approximately the same. Else, because the native int product is
+  # the only one that can lose catastrophic amounts of information, it's the
+  # native int product that must have overflowed.
+  #
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) * cast[U](b))
+      let floatProd = conv(a) * conv(b)
+      let resAsFloat = conv(r)
+      # Fast path for normal case: small multiplicands, and no info
+      # is lost in either method.
+      if resAsFloat == floatProd:
+        res[] = r
+      else:
+        # Somebody somewhere lost info. Close enough, or way off? Note
+        # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
+        # The difference either is or isn't significant compared to the
+        # true value (of which floatProd is a good approximation).
+
+        # abs(diff)/abs(prod) <= 1/32 iff
+        #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
+        if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
+          res[] = r
+        else:
+          result = true
+
+mulImplFallback(nimMulInt, int, uint, toFloat)
+mulImplFallback(nimMulInt64, int64, uint64, toBiggestFloat)
+
+
+template divImplFallback(name, T) {.dirty.} =
+  proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+    # we moved the b == 0 case out into the codegen.
+    if a == low(T) and b == T(-1):
+      result = true
+    else:
+      res[] = a div b
+
+divImplFallback(nimDivInt, int)
+divImplFallback(nimDivInt64, int64)
+
+proc raiseFloatInvalidOp {.compilerproc, noinline.} =
+  sysFatal(FloatInvalidOpDefect, "FPU operation caused a NaN result")
+
+proc raiseFloatOverflow(x: float64) {.compilerproc, noinline.} =
+  if x > 0.0:
+    sysFatal(FloatOverflowDefect, "FPU operation caused an overflow")
+  else:
+    sysFatal(FloatUnderflowDefect, "FPU operations caused an underflow")
diff --git a/lib/system/iterators.nim b/lib/system/iterators.nim
new file mode 100644
index 000000000..125bee98f
--- /dev/null
+++ b/lib/system/iterators.nim
@@ -0,0 +1,353 @@
+## Default iterators for some Nim types.
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+when not defined(nimNoLentIterators):
+  template lent2(T): untyped = lent T
+else:
+  template lent2(T): untyped = T
+
+template unCheckedInc(x) =
+  {.push overflowChecks: off.}
+  inc(x)
+  {.pop.}
+
+iterator items*[T: not char](a: openArray[T]): lent2 T {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator items*[T: char](a: openArray[T]): T {.inline.} =
+  ## Iterates over each item of `a`.
+  # a VM bug currently prevents taking address of openArray[char]
+  # elements converted from a string (would fail in `tests/misc/thallo.nim`)
+  # in any case there's no performance advantage of returning char by address.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator mitems*[T](a: var openArray[T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator items*[IX, T](a: array[IX, T]): T {.inline.} =
+  ## Iterates over each item of `a`.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield a[i]
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator mitems*[IX, T](a: var array[IX, T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield a[i]
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator items*[T](a: set[T]): T {.inline.} =
+  ## Iterates over each element of `a`. `items` iterates only over the
+  ## elements that are really in the set (and not over the ones the set is
+  ## able to hold).
+  var i = low(T).int
+  while i <= high(T).int:
+    when T is enum and not defined(js):
+      if cast[T](i) in a: yield cast[T](i)
+    else:
+      if T(i) in a: yield T(i)
+    unCheckedInc(i)
+
+iterator items*(a: cstring): char {.inline.} =
+  ## Iterates over each item of `a`.
+  runnableExamples:
+    from std/sequtils import toSeq
+    assert toSeq("abc\0def".cstring) == @['a', 'b', 'c']
+    assert toSeq("abc".cstring) == @['a', 'b', 'c']
+  #[
+  assert toSeq(nil.cstring) == @[] # xxx fails with SIGSEGV
+  this fails with SIGSEGV; unclear whether we want to instead yield nothing
+  or pay a small price to check for `nil`, a benchmark is needed. Note that
+  other procs support `nil`.
+  ]#
+  template impl() =
+    var i = 0
+    let n = len(a)
+    while i < n:
+      yield a[i]
+      unCheckedInc(i)
+  when defined(js): impl()
+  else:
+    when nimvm:
+      # xxx `cstring` should behave like c backend instead.
+      impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
+
+iterator mitems*(a: var cstring): var char {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  # xxx this should give CT error in js RT.
+  runnableExamples:
+    from std/sugar import collect
+    var a = "abc\0def"
+    prepareMutation(a)
+    var b = a.cstring
+    let s = collect:
+      for bi in mitems(b):
+        if bi == 'b': bi = 'B'
+        bi
+    assert s == @['a', 'B', 'c']
+    assert b == "aBc"
+    assert a == "aBc\0def"
+
+  template impl() =
+    var i = 0
+    let n = len(a)
+    while i < n:
+      yield a[i]
+      unCheckedInc(i)
+  when defined(js): impl()
+  else:
+    when nimvm: impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
+
+iterator items*[T: enum and Ordinal](E: typedesc[T]): T =
+  ## Iterates over the values of `E`.
+  ## See also `enumutils.items` for enums with holes.
+  runnableExamples:
+    type Goo = enum g0 = 2, g1, g2
+    from std/sequtils import toSeq
+    assert Goo.toSeq == [g0, g1, g2]
+  for v in low(E) .. high(E):
+    yield v
+
+iterator items*[T: Ordinal](s: Slice[T]): T =
+  ## Iterates over the slice `s`, yielding each value between `s.a` and `s.b`
+  ## (inclusively).
+  for x in s.a .. s.b:
+    yield x
+
+iterator pairs*[T](a: openArray[T]): tuple[key: int, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  while i < len(a):
+    yield (i, a[i])
+    unCheckedInc(i)
+
+iterator mpairs*[T](a: var openArray[T]): tuple[key: int, val: var T]{.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  while i < len(a):
+    yield (i, a[i])
+    unCheckedInc(i)
+
+iterator pairs*[IX, T](a: array[IX, T]): tuple[key: IX, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield (i, a[i])
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator mpairs*[IX, T](a: var array[IX, T]): tuple[key: IX, val: var T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield (i, a[i])
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator pairs*[T](a: seq[T]): tuple[key: int, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator mpairs*[T](a: var seq[T]): tuple[key: int, val: var T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator pairs*(a: string): tuple[key: int, val: char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator mpairs*(a: var string): tuple[key: int, val: var char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator pairs*(a: cstring): tuple[key: int, val: char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  when defined(js):
+    var i = 0
+    var L = len(a)
+    while i < L:
+      yield (i, a[i])
+      unCheckedInc(i)
+  else:
+    var i = 0
+    while a[i] != '\0':
+      yield (i, a[i])
+      unCheckedInc(i)
+
+iterator mpairs*(a: var cstring): tuple[key: int, val: var char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  when defined(js):
+    var i = 0
+    var L = len(a)
+    while i < L:
+      yield (i, a[i])
+      unCheckedInc(i)
+  else:
+    var i = 0
+    while a[i] != '\0':
+      yield (i, a[i])
+      unCheckedInc(i)
+
+iterator items*[T](a: seq[T]): lent2 T {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator mitems*[T](a: var seq[T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator items*(a: string): char {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator mitems*(a: var string): var char {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+
+iterator fields*[T: tuple|object](x: T): RootObj {.
+  magic: "Fields", noSideEffect.} =
+  ## Iterates over every field of `x`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug
+  ##   that affects symbol binding in the loop body.
+  runnableExamples:
+    var t = (1, "foo")
+    for v in fields(t): v = default(typeof(v))
+    doAssert t == (0, "")
+
+iterator fields*[S:tuple|object, T:tuple|object](x: S, y: T): tuple[key: string, val: RootObj] {.
+  magic: "Fields", noSideEffect.} =
+  ## Iterates over every field of `x` and `y`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
+  runnableExamples:
+    var t1 = (1, "foo")
+    var t2 = default(typeof(t1))
+    for v1, v2 in fields(t1, t2): v2 = v1
+    doAssert t1 == t2
+
+iterator fieldPairs*[T: tuple|object](x: T): tuple[key: string, val: RootObj] {.
+  magic: "FieldPairs", noSideEffect.} =
+  ## Iterates over every field of `x` returning their name and value.
+  ##
+  ## When you iterate over objects with different field types you have to use
+  ## the compile time `when` instead of a runtime `if` to select the code
+  ## you want to run for each type. To perform the comparison use the `is
+  ## operator <manual.html#generics-is-operator>`_.
+  ## Another way to do the same without `when` is to leave the task of
+  ## picking the appropriate code to a secondary proc which you overload for
+  ## each field type and pass the `value` to.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop. The
+  ##   current implementation also has a bug that affects symbol binding in the
+  ##   loop body.
+  runnableExamples:
+    type
+      Custom = object
+        foo: string
+        bar: bool
+    proc `$`(x: Custom): string =
+      result = "Custom:"
+      for name, value in x.fieldPairs:
+        when value is bool:
+          result.add("\n\t" & name & " is " & $value)
+        else:
+          result.add("\n\t" & name & " '" & value & "'")
+
+iterator fieldPairs*[S: tuple|object, T: tuple|object](x: S, y: T): tuple[
+  key: string, a, b: RootObj] {.
+  magic: "FieldPairs", noSideEffect.} =
+  ## Iterates over every field of `x` and `y`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
+  runnableExamples:
+    type Foo = object
+      x1: int
+      x2: string
+    var a1 = Foo(x1: 12, x2: "abc")
+    var a2: Foo
+    for name, v1, v2 in fieldPairs(a1, a2):
+      when name == "x2": v2 = v1
+    doAssert a2 == Foo(x1: 0, x2: "abc")
diff --git a/lib/system/iterators_1.nim b/lib/system/iterators_1.nim
new file mode 100644
index 000000000..d00e3f823
--- /dev/null
+++ b/lib/system/iterators_1.nim
@@ -0,0 +1,180 @@
+when sizeof(int) <= 2:
+  type IntLikeForCount = int|int8|int16|char|bool|uint8|enum
+else:
+  type IntLikeForCount = int|int8|int16|int32|char|bool|uint8|uint16|enum
+
+iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
+  ## Counts from ordinal value `a` down to `b` (inclusive) with the given
+  ## step count.
+  ##
+  ## `T` may be any ordinal type, `step` may only be positive.
+  ##
+  ## **Note**: This fails to count to `low(int)` if T = int for
+  ## efficiency reasons.
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countdown(7, 3):
+        i
+
+    assert x == @[7, 6, 5, 4, 3]
+
+    let y = collect(newseq):
+      for i in countdown(9, 2, 3):
+        i
+    assert y == @[9, 6, 3]
+  when T is (uint|uint64):
+    var res = a
+    while res >= b:
+      yield res
+      if res == b: break
+      dec(res, step)
+  elif T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res >= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      dec(res, step)
+  else:
+    var res = a
+    while res >= b:
+      yield res
+      dec(res, step)
+
+iterator countup*[T](a, b: T, step: Positive = 1): T {.inline.} =
+  ## Counts from ordinal value `a` to `b` (inclusive) with the given
+  ## step count.
+  ##
+  ## `T` may be any ordinal type, `step` may only be positive.
+  ##
+  ## **Note**: This fails to count to `high(int)` if T = int for
+  ## efficiency reasons.
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countup(3, 7):
+        i
+    
+    assert x == @[3, 4, 5, 6, 7]
+
+    let y = collect(newseq):
+      for i in countup(2, 9, 3):
+        i
+    assert y == @[2, 5, 8]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      inc(res, step)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res, step)
+
+iterator `..`*[T](a, b: T): T {.inline.} =
+  ## An alias for `countup(a, b, 1)`.
+  ##
+  ## See also:
+  ## * [..<](#..<.i,T,T)
+  runnableExamples:
+    import std/sugar
+
+    let x = collect(newSeq):
+      for i in 3 .. 7:
+        i
+
+    assert x == @[3, 4, 5, 6, 7]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      inc(res)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
+
+template dotdotImpl(t) {.dirty.} =
+  iterator `..`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..` for convenience so that
+    ## mixing integer types works better.
+    ##
+    ## See also:
+    ## * [..<](#..<.i,T,T)
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
+
+dotdotImpl(int64)
+dotdotImpl(int32)
+dotdotImpl(uint64)
+dotdotImpl(uint32)
+
+iterator `..<`*[T](a, b: T): T {.inline.} =
+  mixin inc
+  var i = a
+  while i < b:
+    yield i
+    inc i
+
+template dotdotLessImpl(t) {.dirty.} =
+  iterator `..<`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..<` for convenience so that
+    ## mixing integer types works better.
+    var res = a
+    while res < b:
+      yield res
+      inc(res)
+
+dotdotLessImpl(int64)
+dotdotLessImpl(int32)
+dotdotLessImpl(uint64)
+dotdotLessImpl(uint32)
+
+iterator `||`*[S, T](a: S, b: T, annotation: static string = "parallel for"): T {.
+  inline, magic: "OmpParFor", sideEffect.} =
+  ## OpenMP parallel loop iterator. Same as `..` but the loop may run in parallel.
+  ##
+  ## `annotation` is an additional annotation for the code generator to use.
+  ## The default annotation is `parallel for`.
+  ## Please refer to the `OpenMP Syntax Reference
+  ## <https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf>`_
+  ## for further information.
+  ##
+  ## Note that the compiler maps that to
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
+  ## such isn't aware of the parallelism in your code! Be careful! Later
+  ## versions of `||` will get proper support by Nim's code generator
+  ## and GC.
+  discard
+
+iterator `||`*[S, T](a: S, b: T, step: Positive, annotation: static string = "parallel for"): T {.
+  inline, magic: "OmpParFor", sideEffect.} =
+  ## OpenMP parallel loop iterator with stepping.
+  ## Same as `countup` but the loop may run in parallel.
+  ##
+  ## `annotation` is an additional annotation for the code generator to use.
+  ## The default annotation is `parallel for`.
+  ## Please refer to the `OpenMP Syntax Reference
+  ## <https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf>`_
+  ## for further information.
+  ##
+  ## Note that the compiler maps that to
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
+  ## such isn't aware of the parallelism in your code! Be careful! Later
+  ## versions of `||` will get proper support by Nim's code generator
+  ## and GC.
+  discard
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index e12bab184..5599240fd 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -7,6 +7,9 @@
 #    distribution, for details about the copyright.
 #
 
+include system/indexerrors
+import std/private/miscdollars
+
 proc log*(s: cstring) {.importc: "console.log", varargs, nodecl.}
 
 type
@@ -31,8 +34,6 @@ type
 
   JSRef = ref RootObj # Fake type.
 
-{.deprecated: [TSafePoint: SafePoint, TCallFrame: CallFrame].}
-
 var
   framePtr {.importc, nodecl, volatile.}: PCallFrame
   excHandler {.importc, nodecl, volatile.}: int = 0
@@ -48,7 +49,7 @@ proc nimCharToStr(x: char): string {.compilerproc.} =
   result[0] = x
 
 proc isNimException(): bool {.asmNoStackFrame.} =
-  asm "return `lastJSError`.m_type;"
+  {.emit: "return `lastJSError` && `lastJSError`.m_type;".}
 
 proc getCurrentException*(): ref Exception {.compilerRtl, benign.} =
   if isNimException(): result = cast[ref Exception](lastJSError)
@@ -68,9 +69,16 @@ proc getCurrentExceptionMsg*(): string =
         return $msg
   return ""
 
+proc setCurrentException*(exc: ref Exception) =
+  lastJSError = cast[PJSError](exc)
+
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  ## Used to set up exception handling for closure iterators
+  setCurrentException(e)
+
 proc auxWriteStackTrace(f: PCallFrame): string =
   type
-    TempFrame = tuple[procname: cstring, line: int]
+    TempFrame = tuple[procname: cstring, line: int, filename: cstring]
   var
     it = f
     i = 0
@@ -79,6 +87,7 @@ proc auxWriteStackTrace(f: PCallFrame): string =
   while it != nil and i <= high(tempFrames):
     tempFrames[i].procname = it.procname
     tempFrames[i].line = it.line
+    tempFrames[i].filename = it.filename
     inc(i)
     inc(total)
     it = it.prev
@@ -92,10 +101,9 @@ proc auxWriteStackTrace(f: PCallFrame): string =
     add(result, $(total-i))
     add(result, " calls omitted) ...\n")
   for j in countdown(i-1, 0):
+    result.toLocation($tempFrames[j].filename, tempFrames[j].line, 0)
+    add(result, " at ")
     add(result, tempFrames[j].procname)
-    if tempFrames[j].line > 0:
-      add(result, ", line: ")
-      add(result, $tempFrames[j].line)
     add(result, "\n")
 
 proc rawWriteStackTrace(): string =
@@ -104,13 +112,18 @@ proc rawWriteStackTrace(): string =
   else:
     result = "No stack traceback available\n"
 
+proc writeStackTrace() =
+  var trace = rawWriteStackTrace()
+  trace.setLen(trace.len - 1)
+  echo trace
+
 proc getStackTrace*(): string = rawWriteStackTrace()
 proc getStackTrace*(e: ref Exception): string = e.trace
 
 proc unhandledException(e: ref Exception) {.
     compilerproc, asmNoStackFrame.} =
   var buf = ""
-  if e.msg != nil and e.msg[0] != '\0':
+  if e.msg.len != 0:
     add(buf, "Error: unhandled exception: ")
     add(buf, e.msg)
   else:
@@ -120,8 +133,9 @@ proc unhandledException(e: ref Exception) {.
   add(buf, "]\n")
   when NimStackTrace:
     add(buf, rawWriteStackTrace())
-  let cbuf : cstring = buf
-  framePtr = nil
+  let cbuf = cstring(buf)
+  when NimStackTrace:
+    framePtr = nil
   {.emit: """
   if (typeof(Error) !== "undefined") {
     throw new Error(`cbuf`);
@@ -138,35 +152,35 @@ proc raiseException(e: ref Exception, ename: cstring) {.
     unhandledException(e)
   when NimStackTrace:
     e.trace = rawWriteStackTrace()
-  asm "throw `e`;"
+  {.emit: "throw `e`;".}
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
   if lastJSError == nil:
-    raise newException(ReraiseError, "no exception to reraise")
+    raise newException(ReraiseDefect, "no exception to reraise")
   else:
     if excHandler == 0:
       if isNimException():
         unhandledException(cast[ref Exception](lastJSError))
 
-    asm "throw lastJSError;"
+    {.emit: "throw lastJSError;".}
 
-proc raiseOverflow {.exportc: "raiseOverflow", noreturn, compilerProc.} =
-  raise newException(OverflowError, "over- or underflow")
+proc raiseOverflow {.exportc: "raiseOverflow", noreturn, compilerproc.} =
+  raise newException(OverflowDefect, "over- or underflow")
 
-proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn, compilerProc.} =
-  raise newException(DivByZeroError, "division by zero")
+proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn, compilerproc.} =
+  raise newException(DivByZeroDefect, "division by zero")
 
 proc raiseRangeError() {.compilerproc, noreturn.} =
-  raise newException(RangeError, "value out of range")
+  raise newException(RangeDefect, "value out of range")
 
-proc raiseIndexError() {.compilerproc, noreturn.} =
-  raise newException(IndexError, "index out of bounds")
+proc raiseIndexError(i, a, b: int) {.compilerproc, noreturn.} =
+  raise newException(IndexDefect, formatErrorIndexBound(int(i), int(a), int(b)))
 
-proc raiseFieldError(f: string) {.compilerproc, noreturn.} =
-  raise newException(FieldError, f & " is not accessible")
+proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noreturn.} =
+  raise newException(FieldDefect, formatFieldDefect(f, discVal))
 
 proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var i = 0; i < arguments.length; ++i) {
       var x = arguments[i];
@@ -179,17 +193,14 @@ proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
       }
     }
     return result;
-  """
+  """.}
 
 proc makeNimstrLit(c: cstring): string {.asmNoStackFrame, compilerproc.} =
   {.emit: """
-  var ln = `c`.length;
-  var result = new Array(ln + 1);
-  var i = 0;
-  for (; i < ln; ++i) {
+  var result = [];
+  for (var i = 0; i < `c`.length; ++i) {
     result[i] = `c`.charCodeAt(i);
   }
-  result[i] = 0; // terminating zero
   return result;
   """.}
 
@@ -227,146 +238,159 @@ proc cstrToNimstr(c: cstring): string {.asmNoStackFrame, compilerproc.} =
     }
     ++r;
   }
-  result[r] = 0; // terminating zero
   return result;
   """.}
 
-proc toJSStr(s: string): cstring {.asmNoStackFrame, compilerproc.} =
-  asm """
-  var len = `s`.length-1;
-  var asciiPart = new Array(len);
-  var fcc = String.fromCharCode;
-  var nonAsciiPart = null;
-  var nonAsciiOffset = 0;
-  for (var i = 0; i < len; ++i) {
-    if (nonAsciiPart !== null) {
-      var offset = (i - nonAsciiOffset) * 2;
-      var code = `s`[i].toString(16);
-      if (code.length == 1) {
-        code = "0"+code;
-      }
-      nonAsciiPart[offset] = "%";
-      nonAsciiPart[offset + 1] = code;
-    }
-    else if (`s`[i] < 128)
-      asciiPart[i] = fcc(`s`[i]);
-    else {
-      asciiPart.length = i;
-      nonAsciiOffset = i;
-      nonAsciiPart = new Array((len - i) * 2);
-      --i;
-    }
-  }
-  asciiPart = asciiPart.join("");
-  return (nonAsciiPart === null) ?
-      asciiPart : asciiPart + decodeURIComponent(nonAsciiPart.join(""));
-  """
+proc toJSStr(s: string): cstring {.compilerproc.} =
+  proc fromCharCode(c: char): cstring {.importc: "String.fromCharCode".}
+  proc join(x: openArray[cstring]; d = cstring""): cstring {.
+    importcpp: "#.join(@)".}
+  proc decodeURIComponent(x: cstring): cstring {.
+    importc: "decodeURIComponent".}
+
+  proc toHexString(c: char; d = 16): cstring {.importcpp: "#.toString(@)".}
+
+  proc log(x: cstring) {.importc: "console.log".}
+
+  var res = newSeq[cstring](s.len)
+  var i = 0
+  var j = 0
+  while i < s.len:
+    var c = s[i]
+    if c < '\128':
+      res[j] = fromCharCode(c)
+      inc i
+    else:
+      var helper = newSeq[cstring]()
+      while true:
+        let code = toHexString(c)
+        if code.len == 1:
+          helper.add cstring"%0"
+        else:
+          helper.add cstring"%"
+        helper.add code
+        inc i
+        if i >= s.len or s[i] < '\128': break
+        c = s[i]
+      try:
+        res[j] = decodeURIComponent join(helper)
+      except:
+        res[j] = join(helper)
+    inc j
+  setLen(res, j)
+  result = join(res)
 
 proc mnewString(len: int): string {.asmNoStackFrame, compilerproc.} =
-  asm """
-    var result = new Array(`len`+1);
-    result[0] = 0;
-    result[`len`] = 0;
+  {.emit: """
+    var result = new Array(`len`);
+    for (var i = 0; i < `len`; i++) {result[i] = 0;}
     return result;
-  """
+  """.}
 
 proc SetCard(a: int): int {.compilerproc, asmNoStackFrame.} =
   # argument type is a fake
-  asm """
+  {.emit: """
     var result = 0;
     for (var elem in `a`) { ++result; }
     return result;
-  """
+  """.}
 
 proc SetEq(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     for (var elem in `b`) { if (!`a`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLe(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLt(a, b: int): bool {.compilerproc.} =
   result = SetLe(a, b) and not SetEq(a, b)
 
 proc SetMul(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
 proc SetPlus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) { result[elem] = true; }
     for (var elem in `b`) { result[elem] = true; }
     return result;
-  """
+  """.}
 
 proc SetMinus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (!`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
-proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerProc.} =
-  asm """
+proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`a` == `b`) return 0;
     if (!`a`) return -1;
     if (!`b`) return 1;
-    for (var i = 0; i < `a`.length - 1 && i < `b`.length - 1; i++) {
+    for (var i = 0; i < `a`.length && i < `b`.length; i++) {
       var result = `a`[i] - `b`[i];
       if (result != 0) return result;
     }
     return `a`.length - `b`.length;
-  """
+  """.}
 
 proc cmp(x, y: string): int =
-  return cmpStrings(x, y)
+  when nimvm:
+    if x == y: result = 0
+    elif x < y: result = -1
+    else: result = 1
+  else:
+    result = cmpStrings(x, y)
 
-proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerProc.} =
-  asm """
+proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`a` == `b`) return true;
+    if (`a` === null && `b`.length == 0) return true;
+    if (`b` === null && `a`.length == 0) return true;
     if ((!`a`) || (!`b`)) return false;
     var alen = `a`.length;
     if (alen != `b`.length) return false;
     for (var i = 0; i < alen; ++i)
       if (`a`[i] != `b`[i]) return false;
     return true;
-  """
+  """.}
 
 when defined(kwin):
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       print(buf);
-    """
+    """.}
 
 elif not defined(nimOldEcho):
   proc ewriteln(x: cstring) = log(x)
 
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       console.log(buf);
-    """
+    """.}
 
 else:
   proc ewriteln(x: cstring) =
@@ -393,78 +417,85 @@ else:
     """.}
 
 # Arithmetic:
+proc checkOverflowInt(a: int) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 2147483647 || `a` < -2147483648) `raiseOverflow`();
+  """.}
+
 proc addInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` + `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc subInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` - `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc mulInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` * `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc divInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` / `b`);
-  """
+  """.}
 
 proc modInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` % `b`);
-  """
+  """.}
 
-proc addInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc checkOverflowInt64(a: int64) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 9223372036854775807n || `a` < -9223372036854775808n) `raiseOverflow`();
+  """.}
+
+proc addInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` + `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc subInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc subInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` - `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc mulInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc mulInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` * `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc divInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` / `b`);
-  """
+proc divInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` / `b`;
+  """.}
 
-proc modInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` % `b`);
-  """
+proc modInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` % `b`;
+  """.}
 
 proc negInt(a: int): int {.compilerproc.} =
   result = a*(-1)
@@ -478,35 +509,13 @@ proc absInt(a: int): int {.compilerproc.} =
 proc absInt64(a: int64): int64 {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
-proc ze*(a: int): int {.compilerproc.} =
-  result = a
-
-proc ze64*(a: int64): int64 {.compilerproc.} =
-  result = a
-
-proc toU8*(a: int): int8 {.asmNoStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
-proc toU16*(a: int): int16 {.asmNoStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
-proc toU32*(a: int64): int32 {.asmNoStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
 proc nimMin(a, b: int): int {.compilerproc.} = return if a <= b: a else: b
 proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 
-proc chckNilDisp(p: pointer) {.compilerproc.} =
+proc chckNilDisp(p: JSRef) {.compilerproc.} =
   if p == nil:
-    sysFatal(NilAccessError, "cannot dispatch; dispatcher is nil")
+    sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
 
-type NimString = string # hack for hti.nim
 include "system/hti"
 
 proc isFatPointer(ti: PNimType): bool =
@@ -521,19 +530,22 @@ proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
   of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
-    """
+    """.}
   of nkList:
-    for i in 0..n.len-1:
-      nimCopyAux(dest, src, n.sons[i])
+    {.emit: """
+    for (var i = 0; i < `n`.sons.length; i++) {
+      nimCopyAux(`dest`, `src`, `n`.sons[i]);
+    }
+    """.}
   of nkCase:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
       for (var i = 0; i < `n`.sons.length; ++i) {
         nimCopyAux(`dest`, `src`, `n`.sons[i][1]);
       }
-    """
+    """.}
 
 proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
@@ -541,9 +553,9 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
     if not isFatPointer(ti):
       result = src
     else:
-      asm "`result` = [`src`[0], `src`[1]];"
+      {.emit: "`result` = [`src`[0], `src`[1]];".}
   of tySet:
-    asm """
+    {.emit: """
       if (`dest` === null || `dest` === undefined) {
         `dest` = {};
       }
@@ -552,84 +564,76 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
       }
       for (var key in `src`) { `dest`[key] = `src`[key]; }
       `result` = `dest`;
-    """
+    """.}
   of tyTuple, tyObject:
     if ti.base != nil: result = nimCopy(dest, src, ti.base)
     elif ti.kind == tyObject:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;".}
     else:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;".}
     nimCopyAux(result, src, ti.node)
-  of tySequence, tyArrayConstr, tyOpenArray, tyArray:
-    asm """
+  of tyArrayConstr, tyArray:
+    # In order to prevent a type change (TypedArray -> Array) and to have better copying performance,
+    # arrays constructors are considered separately
+    {.emit: """
+      if(ArrayBuffer.isView(`src`)) { 
+        if(`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+          `dest` = new `src`.constructor(`src`);
+        } else {
+          `dest`.set(`src`, 0);
+        }
+        `result` = `dest`;
+      } else {
+        if (`src` === null) {
+          `result` = null;
+        }
+        else {
+          if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+            `dest` = new Array(`src`.length);
+          }
+          `result` = `dest`;
+          for (var i = 0; i < `src`.length; ++i) {
+            `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+          }
+        }
+      }
+    """.}
+  of tySequence, tyOpenArray:
+    {.emit: """
       if (`src` === null) {
         `result` = null;
       }
       else {
-        if (`dest` === null || `dest` === undefined) {
+        if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
           `dest` = new Array(`src`.length);
         }
-        else {
-          `dest`.length = `src`.length;
-        }
         `result` = `dest`;
         for (var i = 0; i < `src`.length; ++i) {
           `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
         }
       }
-    """
+    """.}
   of tyString:
-    asm """
+    {.emit: """
       if (`src` !== null) {
         `result` = `src`.slice(0);
       }
-    """
+    """.}
   else:
     result = src
 
-proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
-  asm "`result` = null;"
-  case ti.kind
-  of tyPtr, tyRef, tyVar, tyNil:
-    if isFatPointer(ti):
-      asm """
-        `result` = [null, 0];
-      """
-  of tySet:
-    asm """
-      `result` = {};
-    """
-  of tyTuple, tyObject:
-    if ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
-    else:
-      asm "`result` = {};"
-  of tySequence, tyOpenArray:
-    asm """
-      `result` = [];
-    """
-  of tyArrayConstr, tyArray:
-    asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = genericReset(`x`[i], `ti`.base);
-      }
-    """
-  else:
-    discard
-
 proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
                 asmNoStackFrame, compilerproc.} =
   # types are fake
-  asm """
+  {.emit: """
     var result = new Array(`len`);
     for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
     return result;
-  """
+  """.}
 
 proc chckIndx(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
-  else: raiseIndexError()
+  else: raiseIndexError(i, a, b)
 
 proc chckRange(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
@@ -641,7 +645,7 @@ proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
   if x == subclass: return # optimized fast path
   while x != subclass:
     if x == nil:
-      raise newException(ObjectConversionError, "invalid object conversion")
+      raise newException(ObjectConversionDefect, "invalid object conversion")
     x = x.base
 
 proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
@@ -654,13 +658,12 @@ proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
   return true
 
 proc addChar(x: string, c: char) {.compilerproc, asmNoStackFrame.} =
-  asm """
-    `x`[`x`.length-1] = `c`; `x`.push(0);
-  """
+  {.emit: "`x`.push(`c`);".}
 
 {.pop.}
 
 proc tenToThePowerOf(b: int): BiggestFloat =
+  # xxx deadcode
   var b = b
   var a = 10.0
   result = 1.0
@@ -674,92 +677,92 @@ proc tenToThePowerOf(b: int): BiggestFloat =
 const
   IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
 
-# XXX use JS's native way here
-proc nimParseBiggestFloat(s: string, number: var BiggestFloat, start = 0): int {.
-                          compilerProc.} =
-  var
-    esign = 1.0
-    sign = 1.0
-    i = start
-    exponent: int
-    flags: int
-  number = 0.0
+
+proc parseFloatNative(a: openarray[char]): float =
+  var str = ""
+  for x in a:
+    str.add x
+
+  let cstr = cstring str
+
+  {.emit: """
+  `result` = Number(`cstr`);
+  """.}
+
+proc nimParseBiggestFloat(s: openarray[char], number: var BiggestFloat): int {.compilerproc.} =
+  var sign: bool
+  var i = 0
   if s[i] == '+': inc(i)
   elif s[i] == '-':
-    sign = -1.0
+    sign = true
     inc(i)
   if s[i] == 'N' or s[i] == 'n':
     if s[i+1] == 'A' or s[i+1] == 'a':
       if s[i+2] == 'N' or s[i+2] == 'n':
         if s[i+3] notin IdentChars:
           number = NaN
-          return i+3 - start
+          return i+3
     return 0
   if s[i] == 'I' or s[i] == 'i':
     if s[i+1] == 'N' or s[i+1] == 'n':
       if s[i+2] == 'F' or s[i+2] == 'f':
         if s[i+3] notin IdentChars:
-          number = Inf*sign
-          return i+3 - start
+          number = if sign: -Inf else: Inf
+          return i+3
     return 0
-  while s[i] in {'0'..'9'}:
-    # Read integer part
-    flags = flags or 1
-    number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
+
+  var buf: string
+    # we could also use an `array[char, N]` buffer to avoid reallocs, or
+    # use a 2-pass algorithm that first computes the length.
+  if sign: buf.add '-'
+  template addInc =
+    buf.add s[i]
     inc(i)
+  template eatUnderscores =
     while s[i] == '_': inc(i)
-  # Decimal?
-  if s[i] == '.':
-    var hd = 1.0
+  while s[i] in {'0'..'9'}: # Read integer part
+    buf.add s[i]
     inc(i)
-    while s[i] in {'0'..'9'}:
-      # Read fractional part
-      flags = flags or 2
-      number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
-      hd = hd * 10.0
-      inc(i)
-      while s[i] == '_': inc(i)
-    number = number / hd # this complicated way preserves precision
+    eatUnderscores()
+  if s[i] == '.': # Decimal?
+    addInc()
+    while s[i] in {'0'..'9'}: # Read fractional part
+      addInc()
+      eatUnderscores()
   # Again, read integer and fractional part
-  if flags == 0: return 0
-  # Exponent?
-  if s[i] in {'e', 'E'}:
-    inc(i)
-    if s[i] == '+':
-      inc(i)
-    elif s[i] == '-':
-      esign = -1.0
-      inc(i)
-    if s[i] notin {'0'..'9'}:
-      return 0
+  if buf.len == ord(sign): return 0
+  if s[i] in {'e', 'E'}: # Exponent?
+    addInc()
+    if s[i] == '+': inc(i)
+    elif s[i] == '-': addInc()
+    if s[i] notin {'0'..'9'}: return 0
     while s[i] in {'0'..'9'}:
-      exponent = exponent * 10 + ord(s[i]) - ord('0')
-      inc(i)
-      while s[i] == '_': inc(i)
-  # Calculate Exponent
-  let hd = tenToThePowerOf(exponent)
-  if esign > 0.0: number = number * hd
-  else:           number = number / hd
-  # evaluate sign
-  number = number * sign
-  result = i - start
-
-when defined(nodejs):
-  # Deprecated. Use `alert` defined in dom.nim
-  proc alert*(s: cstring) {.importc: "console.log", nodecl, deprecated.}
-else:
-  # Deprecated. Use `alert` defined in dom.nim
-  proc alert*(s: cstring) {.importc, nodecl, deprecated.}
+      addInc()
+      eatUnderscores()
+  number = parseFloatNative(buf)
+  result = i
 
 # Workaround for IE, IE up to version 11 lacks 'Math.trunc'. We produce
 # 'Math.trunc' for Nim's ``div`` and ``mod`` operators:
-when not defined(nodejs):
+when defined(nimJsMathTruncPolyfill):
+  {.emit: """
+if (!Math.trunc) {
+  Math.trunc = function(v) {
+    v = +v;
+    if (!isFinite(v)) return v;
+    return (v - v % 1) || (v < 0 ? -0 : v === 0 ? v : 0);
+  };
+}
+""".}
+
+proc cmpClosures(a, b: JSRef): bool {.compilerproc, asmNoStackFrame.} =
+  # Both `a` and `b` need to be a closure
   {.emit: """
-  if (!Math.trunc) {
-    Math.trunc = function(v) {
-      v = +v;
-      if (!isFinite(v)) return v;
-
-      return (v - v % 1)   ||   (v < 0 ? -0 : v === 0 ? v : 0);
-    };
-  }""".}
+    if (`a` !== null && `a`.ClP_0 !== undefined &&
+        `b` !== null && `b`.ClP_0 !== undefined) {
+      return `a`.ClP_0 == `b`.ClP_0 && `a`.ClE_0 == `b`.ClE_0;
+    } else {
+      return `a` == `b`;
+    }
+  """
+  .}
diff --git a/lib/system/memalloc.nim b/lib/system/memalloc.nim
new file mode 100644
index 000000000..a94d0995c
--- /dev/null
+++ b/lib/system/memalloc.nim
@@ -0,0 +1,449 @@
+when notJSnotNims:
+  proc zeroMem*(p: pointer, size: Natural) {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Overwrites the contents of the memory at `p` with the value 0.
+    ##
+    ## Exactly `size` bytes will be overwritten. Like any procedure
+    ## dealing with raw memory this is **unsafe**.
+
+  proc copyMem*(dest, source: pointer, size: Natural) {.inline, benign,
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
+    ## Exactly `size` bytes will be copied. The memory
+    ## regions may not overlap. Like any procedure dealing with raw
+    ## memory this is **unsafe**.
+
+  proc moveMem*(dest, source: pointer, size: Natural) {.inline, benign,
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
+    ##
+    ## Exactly `size` bytes will be copied. The memory
+    ## regions may overlap, `moveMem` handles this case appropriately
+    ## and is thus somewhat more safe than `copyMem`. Like any procedure
+    ## dealing with raw memory this is still **unsafe**, though.
+
+  proc equalMem*(a, b: pointer, size: Natural): bool {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
+    ## be compared.
+    ##
+    ## If the blocks are equal, `true` is returned, `false`
+    ## otherwise. Like any procedure dealing with raw memory this is
+    ## **unsafe**.
+
+  proc cmpMem*(a, b: pointer, size: Natural): int {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
+    ## be compared.
+    ##
+    ## Returns:
+    ## * a value less than zero, if `a < b`
+    ## * a value greater than zero, if `a > b`
+    ## * zero, if `a == b`
+    ##
+    ## Like any procedure dealing with raw memory this is
+    ## **unsafe**.
+
+when hasAlloc and not defined(js):
+
+  proc allocImpl*(size: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc alloc0Impl*(size: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc deallocImpl*(p: pointer) {.noconv, rtl, tags: [], benign, raises: [].}
+  proc reallocImpl*(p: pointer, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc realloc0Impl*(p: pointer, oldSize, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+
+  proc allocSharedImpl*(size: Natural): pointer {.noconv, compilerproc, rtl, benign, raises: [], tags: [].}
+  proc allocShared0Impl*(size: Natural): pointer {.noconv, rtl, benign, raises: [], tags: [].}
+  proc deallocSharedImpl*(p: pointer) {.noconv, rtl, benign, raises: [], tags: [].}
+  proc reallocSharedImpl*(p: pointer, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc reallocShared0Impl*(p: pointer, oldSize, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+
+  # Allocator statistics for memory leak tests
+
+  {.push stackTrace: off.}
+
+  type AllocStats* = object
+    allocCount: int
+    deallocCount: int
+
+  proc `-`*(a, b: AllocStats): AllocStats =
+    result.allocCount = a.allocCount - b.allocCount
+    result.deallocCount = a.deallocCount - b.deallocCount
+
+  template dumpAllocstats*(code: untyped) =
+    let stats1 = getAllocStats()
+    code
+    let stats2 = getAllocStats()
+    echo $(stats2 - stats1)
+
+  when defined(nimAllocStats):
+    var stats: AllocStats
+    template incStat(what: untyped) = atomicInc stats.what
+    proc getAllocStats*(): AllocStats = stats
+
+  else:
+    template incStat(what: untyped) = discard
+    proc getAllocStats*(): AllocStats = discard
+
+  template alloc*(size: Natural): pointer =
+    ## Allocates a new memory block with at least `size` bytes.
+    ##
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is not initialized, so reading
+    ## from it before writing to it is undefined behaviour!
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `allocShared <#allocShared.t,Natural>`_ to allocate from a shared heap.
+    ##
+    ## See also:
+    ## * `alloc0 <#alloc0.t,Natural>`_
+    incStat(allocCount)
+    allocImpl(size)
+
+  proc createU*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is not initialized, so reading
+    ## from it before writing to it is undefined behaviour!
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `createSharedU <#createSharedU,typedesc>`_ to allocate from a shared heap.
+    ##
+    ## See also:
+    ## * `create <#create,typedesc>`_
+    cast[ptr T](alloc(T.sizeof * size))
+
+  template alloc0*(size: Natural): pointer =
+    ## Allocates a new memory block with at least `size` bytes.
+    ##
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer than  `alloc <#alloc.t,Natural>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `allocShared0 <#allocShared0.t,Natural>`_ to allocate from a shared heap.
+    incStat(allocCount)
+    alloc0Impl(size)
+
+  proc create*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer than `createU <#createU,typedesc>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `createShared <#createShared,typedesc>`_ to allocate from a shared heap.
+    cast[ptr T](alloc0(sizeof(T) * size))
+
+  template realloc*(p: pointer, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with
+    ## `dealloc(block) <#dealloc,pointer>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
+    ## from a shared heap.
+    reallocImpl(p, newSize)
+
+  template realloc0*(p: pointer, oldSize, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with
+    ## `dealloc(block) <#dealloc,pointer>`_.
+    ##
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer then realloc
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
+    ## from a shared heap.
+    realloc0Impl(p, oldSize, newSize)
+
+  proc resize*[T](p: ptr T, newSize: Natural): ptr T {.inline, benign, raises: [].} =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resize` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with `free`.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `resizeShared <#resizeShared,ptr.T,Natural>`_ to reallocate
+    ## from a shared heap.
+    cast[ptr T](realloc(p, T.sizeof * newSize))
+
+  proc dealloc*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
+    ## Frees the memory allocated with `alloc`, `alloc0`,
+    ## `realloc`, `create` or `createU`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    ##
+    ## The freed memory must belong to its allocating thread!
+    ## Use `deallocShared <#deallocShared,pointer>`_ to deallocate from a shared heap.
+    incStat(deallocCount)
+    deallocImpl(p)
+
+  template allocShared*(size: Natural): pointer =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
+    ## or `deallocShared(block) <#deallocShared,pointer>`_.
+    ##
+    ## The block is not initialized, so reading from it before writing
+    ## to it is undefined behaviour!
+    ##
+    ## See also:
+    ## * `allocShared0 <#allocShared0.t,Natural>`_.
+    incStat(allocCount)
+    allocSharedImpl(size)
+
+  proc createSharedU*(T: typedesc, size = 1.Positive): ptr T {.inline, tags: [],
+                                                               benign, raises: [].} =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
+    ## `freeShared(block) <#freeShared,ptr.T>`_.
+    ##
+    ## The block is not initialized, so reading from it before writing
+    ## to it is undefined behaviour!
+    ##
+    ## See also:
+    ## * `createShared <#createShared,typedesc>`_
+    cast[ptr T](allocShared(T.sizeof * size))
+
+  template allocShared0*(size: Natural): pointer =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
+    ## or `deallocShared(block) <#deallocShared,pointer>`_.
+    ##
+    ## The block is initialized with all bytes
+    ## containing zero, so it is somewhat safer than
+    ## `allocShared <#allocShared.t,Natural>`_.
+    incStat(allocCount)
+    allocShared0Impl(size)
+
+  proc createShared*(T: typedesc, size = 1.Positive): ptr T {.inline.} =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
+    ## `freeShared(block) <#freeShared,ptr.T>`_.
+    ##
+    ## The block is initialized with all bytes
+    ## containing zero, so it is somewhat safer than
+    ## `createSharedU <#createSharedU,typedesc>`_.
+    cast[ptr T](allocShared0(T.sizeof * size))
+
+  template reallocShared*(p: pointer, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `deallocShared <#deallocShared,pointer>`_.
+    reallocSharedImpl(p, newSize)
+
+  template reallocShared0*(p: pointer, oldSize, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## When growing, the new bytes of the block is initialized with all bytes
+    ## containing zero, so it is somewhat safer then reallocShared
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `deallocShared <#deallocShared,pointer>`_.
+    reallocShared0Impl(p, oldSize, newSize)
+
+  proc resizeShared*[T](p: ptr T, newSize: Natural): ptr T {.inline, raises: [].} =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resizeShared` calls
+    ## `freeShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `freeShared <#freeShared,ptr.T>`_.
+    cast[ptr T](reallocShared(p, T.sizeof * newSize))
+
+  proc deallocShared*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
+    ## Frees the memory allocated with `allocShared`, `allocShared0` or
+    ## `reallocShared`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    incStat(deallocCount)
+    deallocSharedImpl(p)
+
+  proc freeShared*[T](p: ptr T) {.inline, benign, raises: [].} =
+    ## Frees the memory allocated with `createShared`, `createSharedU` or
+    ## `resizeShared`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    deallocShared(p)
+
+  include bitmasks
+
+  template `+!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) +% int(s))
+
+  template `-!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) -% int(s))
+
+  proc alignedAlloc(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared(size)
+      else:
+        result = alloc(size)
+    else:
+      # allocate (size + align - 1) necessary for alignment,
+      # plus 2 bytes to store offset
+      when compileOption("threads"):
+        let base = allocShared(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc(size + align - 1 + sizeof(uint16))
+      # memory layout: padding + offset (2 bytes) + user_data
+      # in order to deallocate: read offset at user_data - 2 bytes,
+      # then deallocate user_data - offset
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedAlloc0(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared0(size)
+      else:
+        result = alloc0(size)
+    else:
+      # see comments for alignedAlloc
+      when compileOption("threads"):
+        let base = allocShared0(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc0(size + align - 1 + sizeof(uint16))
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedDealloc(p: pointer, align: int) {.compilerproc.} =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        deallocShared(p)
+      else:
+        dealloc(p)
+    else:
+      # read offset at p - 2 bytes, then deallocate (p - offset) pointer
+      let offset = cast[ptr uint16](p -! sizeof(uint16))[]
+      when compileOption("threads"):
+        deallocShared(p -! offset)
+      else:
+        dealloc(p -! offset)
+
+  proc alignedRealloc(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared(p, newSize)
+      else:
+        result = realloc(p, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      alignedDealloc(p, align)
+
+  proc alignedRealloc0(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared0(p, oldSize, newSize)
+      else:
+        result = realloc0(p, oldSize, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      zeroMem(result +! oldSize, newSize - oldSize)
+      alignedDealloc(p, align)
+
+  {.pop.}
+
+# GC interface:
+
+when hasAlloc:
+  proc getOccupiedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process and hold data.
+
+  proc getFreeMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process, but do not
+    ## hold any meaningful data.
+
+  proc getTotalMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process.
+
+
+when defined(js):
+  # Stubs:
+  proc getOccupiedMem(): int = return -1
+  proc getFreeMem(): int = return -1
+  proc getTotalMem(): int = return -1
+
+  proc dealloc(p: pointer) = discard
+  proc alloc(size: Natural): pointer = discard
+  proc alloc0(size: Natural): pointer = discard
+  proc realloc(p: pointer, newsize: Natural): pointer = discard
+  proc realloc0(p: pointer, oldsize, newsize: Natural): pointer = discard
+
+  proc allocShared(size: Natural): pointer = discard
+  proc allocShared0(size: Natural): pointer = discard
+  proc deallocShared(p: pointer) = discard
+  proc reallocShared(p: pointer, newsize: Natural): pointer = discard
+  proc reallocShared0(p: pointer, oldsize, newsize: Natural): pointer = discard
+
+
+when hasAlloc and hasThreadSupport and not defined(useMalloc):
+  proc getOccupiedSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process
+    ## on the shared heap and hold data. This is only available when
+    ## threads are enabled.
+
+  proc getFreeSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the
+    ## process on the shared heap, but do not hold any meaningful data.
+    ## This is only available when threads are enabled.
+
+  proc getTotalSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes on the shared heap that are owned by the
+    ## process. This is only available when threads are enabled.
diff --git a/lib/system/memory.nim b/lib/system/memory.nim
new file mode 100644
index 000000000..156773c48
--- /dev/null
+++ b/lib/system/memory.nim
@@ -0,0 +1,55 @@
+{.push stack_trace: off.}
+
+const useLibC = not defined(nimNoLibc)
+
+when useLibC:
+  import ansi_c
+
+proc nimCopyMem*(dest, source: pointer, size: Natural) {.nonReloadable, compilerproc, inline.} =
+  when useLibC:
+    c_memcpy(dest, source, cast[csize_t](size))
+  else:
+    let d = cast[ptr UncheckedArray[byte]](dest)
+    let s = cast[ptr UncheckedArray[byte]](source)
+    var i = 0
+    while i < size:
+      d[i] = s[i]
+      inc i
+
+proc nimSetMem*(a: pointer, v: cint, size: Natural) {.nonReloadable, inline.} =
+  when useLibC:
+    c_memset(a, v, cast[csize_t](size))
+  else:
+    let a = cast[ptr UncheckedArray[byte]](a)
+    var i = 0
+    let v = cast[byte](v)
+    while i < size:
+      a[i] = v
+      inc i
+
+proc nimZeroMem*(p: pointer, size: Natural) {.compilerproc, nonReloadable, inline.} =
+  nimSetMem(p, 0, size)
+
+proc nimCmpMem*(a, b: pointer, size: Natural): cint {.compilerproc, nonReloadable, inline.} =
+  when useLibC:
+    c_memcmp(a, b, cast[csize_t](size))
+  else:
+    let a = cast[ptr UncheckedArray[byte]](a)
+    let b = cast[ptr UncheckedArray[byte]](b)
+    var i = 0
+    while i < size:
+      let d = a[i].cint - b[i].cint
+      if d != 0: return d
+      inc i
+
+proc nimCStrLen*(a: cstring): int {.compilerproc, nonReloadable, inline.} =
+  if a.isNil: return 0
+  when useLibC:
+    cast[int](c_strlen(a))
+  else:
+    var a = cast[ptr byte](a)
+    while a[] != 0:
+      a = cast[ptr byte](cast[uint](a) + 1)
+      inc result
+
+{.pop.}
diff --git a/lib/system/memtracker.nim b/lib/system/memtracker.nim
index ae0297438..289f4e024 100644
--- a/lib/system/memtracker.nim
+++ b/lib/system/memtracker.nim
@@ -35,7 +35,7 @@ type
     count*: int
     disabled: bool
     data*: array[400, LogEntry]
-  TrackLogger* = proc (log: TrackLog) {.nimcall, tags: [], locks: 0, gcsafe.}
+  TrackLogger* = proc (log: TrackLog) {.nimcall, tags: [], gcsafe.}
 
 var
   gLog*: TrackLog
@@ -70,22 +70,22 @@ proc addEntry(entry: LogEntry) =
     if interesting:
       gLog.disabled = true
       cprintf("interesting %s:%ld %s\n", entry.file, entry.line, entry.op)
-      let x = cast[proc() {.nimcall, tags: [], gcsafe, locks: 0.}](writeStackTrace)
+      let x = cast[proc() {.nimcall, tags: [], gcsafe, raises: [].}](writeStackTrace)
       x()
-      quit 1
-      if gLog.count > high(gLog.data):
-        gLogger(gLog)
-        gLog.count = 0
-      gLog.data[gLog.count] = entry
-      inc gLog.count
-      gLog.disabled = false
-
-proc memTrackerWrite(address: pointer; size: int; file: cstring; line: int) {.compilerProc.} =
+      rawQuit 1
+      #if gLog.count > high(gLog.data):
+      #  gLogger(gLog)
+      #  gLog.count = 0
+      #gLog.data[gLog.count] = entry
+      #inc gLog.count
+      #gLog.disabled = false
+
+proc memTrackerWrite(address: pointer; size: int; file: cstring; line: int) {.compilerproc.} =
   addEntry LogEntry(op: "write", address: address,
       size: size, file: file, line: line, thread: myThreadId())
 
 proc memTrackerOp*(op: cstring; address: pointer; size: int) {.tags: [],
-         locks: 0, gcsafe.} =
+         gcsafe.} =
   addEntry LogEntry(op: op, address: address, size: size,
       file: "", line: 0, thread: myThreadId())
 
@@ -100,6 +100,7 @@ proc logPendingOps() {.noconv.} =
   gLogger(gLog)
   gLog.count = 0
 
-addQuitProc logPendingOps
+import std/exitprocs
+addExitProc logPendingOps
 
 {.pop.}
diff --git a/lib/system/mm/boehm.nim b/lib/system/mm/boehm.nim
new file mode 100644
index 000000000..362d2d470
--- /dev/null
+++ b/lib/system/mm/boehm.nim
@@ -0,0 +1,140 @@
+
+
+
+proc boehmGCinit {.importc: "GC_init", boehmGC.}
+proc boehmGC_disable {.importc: "GC_disable", boehmGC.}
+proc boehmGC_enable {.importc: "GC_enable", boehmGC.}
+proc boehmGCincremental {.
+  importc: "GC_enable_incremental", boehmGC.}
+proc boehmGCfullCollect {.importc: "GC_gcollect", boehmGC.}
+proc boehmGC_set_all_interior_pointers(flag: cint) {.
+  importc: "GC_set_all_interior_pointers", boehmGC.}
+proc boehmAlloc(size: int): pointer {.importc: "GC_malloc", boehmGC.}
+proc boehmAllocAtomic(size: int): pointer {.
+  importc: "GC_malloc_atomic", boehmGC.}
+proc boehmRealloc(p: pointer, size: int): pointer {.
+  importc: "GC_realloc", boehmGC.}
+proc boehmDealloc(p: pointer) {.importc: "GC_free", boehmGC.}
+when hasThreadSupport:
+  proc boehmGC_allow_register_threads {.
+    importc: "GC_allow_register_threads", boehmGC.}
+
+proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", boehmGC.}
+  ## Return the number of bytes in the heap.  Excludes collector private
+  ## data structures. Includes empty blocks and fragmentation loss.
+  ## Includes some pages that were allocated but never written.
+
+proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", boehmGC.}
+  ## Return a lower bound on the number of free bytes in the heap.
+
+proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc", boehmGC.}
+  ## Return the number of bytes allocated since the last collection.
+
+proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes", boehmGC.}
+  ## Return the total number of bytes allocated in this process.
+  ## Never decreases.
+
+proc boehmRegisterFinalizer(obj, ff, cd, off, ocd: pointer) {.importc: "GC_register_finalizer", boehmGC.}
+
+proc allocAtomic(size: int): pointer =
+  result = boehmAllocAtomic(size)
+  zeroMem(result, size)
+
+when not defined(useNimRtl):
+
+  proc allocImpl(size: Natural): pointer =
+    result = boehmAlloc(size)
+    if result == nil: raiseOutOfMem()
+  proc alloc0Impl(size: Natural): pointer =
+    result = alloc(size)
+  proc reallocImpl(p: pointer, newSize: Natural): pointer =
+    result = boehmRealloc(p, newSize)
+    if result == nil: raiseOutOfMem()
+  proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    result = boehmRealloc(p, newSize)
+    if result == nil: raiseOutOfMem()
+    if newSize > oldSize:
+      zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+  proc deallocImpl(p: pointer) = boehmDealloc(p)
+
+  proc allocSharedImpl(size: Natural): pointer = allocImpl(size)
+  proc allocShared0Impl(size: Natural): pointer = alloc0Impl(size)
+  proc reallocSharedImpl(p: pointer, newSize: Natural): pointer = reallocImpl(p, newSize)
+  proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer = realloc0Impl(p, oldSize, newSize)
+  proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+  when hasThreadSupport:
+    proc getFreeSharedMem(): int =
+      boehmGetFreeBytes()
+    proc getTotalSharedMem(): int =
+      boehmGetHeapSize()
+    proc getOccupiedSharedMem(): int =
+      getTotalSharedMem() - getFreeSharedMem()
+
+  #boehmGCincremental()
+
+  proc GC_disable() = boehmGC_disable()
+  proc GC_enable() = boehmGC_enable()
+  proc GC_fullCollect() = boehmGCfullCollect()
+  proc GC_setStrategy(strategy: GC_Strategy) = discard
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
+  proc GC_getStatistics(): string = return ""
+
+  proc getOccupiedMem(): int = return boehmGetHeapSize()-boehmGetFreeBytes()
+  proc getFreeMem(): int = return boehmGetFreeBytes()
+  proc getTotalMem(): int = return boehmGetHeapSize()
+
+  proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc initGC() =
+  when defined(boehmNoIntPtr):
+    # See #12286
+    boehmGC_set_all_interior_pointers(0)
+  boehmGCinit()
+  when hasThreadSupport:
+    boehmGC_allow_register_threads()
+
+proc boehmgc_finalizer(obj: pointer, typedFinalizer: (proc(x: pointer) {.cdecl.})) =
+  typedFinalizer(obj)
+
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  if ntfNoRefs in typ.flags: result = allocAtomic(size)
+  else: result = alloc(size)
+  if typ.finalizer != nil:
+    boehmRegisterFinalizer(result, boehmgc_finalizer, typ.finalizer, nil, nil)
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  result = newObj(typ, align(GenericSeqSize, typ.base.align) + len * typ.base.size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+{.pop.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = boehmAlloc(size)
+  if result == nil: raiseOutOfMem()
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc dealloc(r: var MemRegion, p: pointer) = boehmDealloc(p)
+proc deallocOsPages(r: var MemRegion) {.inline.} = discard
+proc deallocOsPages() {.inline.} = discard
+
+include "system/cellsets"
diff --git a/lib/system/mm/go.nim b/lib/system/mm/go.nim
new file mode 100644
index 000000000..8f3aeb964
--- /dev/null
+++ b/lib/system/mm/go.nim
@@ -0,0 +1,153 @@
+
+when defined(windows):
+  const goLib = "libgo.dll"
+elif defined(macosx):
+  const goLib = "libgo.dylib"
+else:
+  const goLib = "libgo.so"
+
+proc initGC() = discard
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc go_gc() {.importc: "go_gc", dynlib: goLib.}
+proc GC_fullCollect() = go_gc()
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+
+const
+  goNumSizeClasses = 67
+
+type
+  goMStats = object
+    alloc: uint64          # bytes allocated and still in use
+    total_alloc: uint64    # bytes allocated (even if freed)
+    sys: uint64            # bytes obtained from system
+    nlookup: uint64        # number of pointer lookups
+    nmalloc: uint64        # number of mallocs
+    nfree: uint64          # number of frees
+    heap_objects: uint64   # total number of allocated objects
+    pause_total_ns: uint64 # cumulative nanoseconds in GC stop-the-world pauses since the program started
+    numgc: uint32          # number of completed GC cycles
+
+proc goMemStats(): goMStats {.importc: "go_mem_stats", dynlib: goLib.}
+proc goMalloc(size: uint): pointer {.importc: "go_malloc", dynlib: goLib.}
+proc goSetFinalizer(obj: pointer, f: pointer) {.importc: "set_finalizer", codegenDecl:"$1 $2$3 __asm__ (\"main.Set_finalizer\");\n$1 $2$3", dynlib: goLib.}
+proc writebarrierptr(dest: PPointer, src: pointer) {.importc: "writebarrierptr", codegenDecl:"$1 $2$3 __asm__ (\"main.Atomic_store_pointer\");\n$1 $2$3", dynlib: goLib.}
+
+proc GC_getStatistics(): string =
+  var mstats = goMemStats()
+  result = "[GC] total allocated memory: " & $(mstats.total_alloc) & "\n" &
+           "[GC] total memory obtained from system: " & $(mstats.sys) & "\n" &
+           "[GC] occupied memory: " & $(mstats.alloc) & "\n" &
+           "[GC] number of pointer lookups: " & $(mstats.nlookup) & "\n" &
+           "[GC] number of mallocs: " & $(mstats.nmalloc) & "\n" &
+           "[GC] number of frees: " & $(mstats.nfree) & "\n" &
+           "[GC] heap objects: " & $(mstats.heap_objects) & "\n" &
+           "[GC] number of completed GC cycles: " & $(mstats.numgc) & "\n" &
+           "[GC] total GC pause time [ms]: " & $(mstats.pause_total_ns div 1000_000)
+
+proc getOccupiedMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.alloc)
+
+proc getFreeMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.sys - mstats.alloc)
+
+proc getTotalMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.sys)
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc allocImpl(size: Natural): pointer =
+  result = goMalloc(size.uint)
+
+proc alloc0Impl(size: Natural): pointer =
+  result = goMalloc(size.uint)
+
+proc reallocImpl(p: pointer, newsize: Natural): pointer =
+  doAssert false, "not implemented"
+
+proc realloc0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  doAssert false, "not implemented"
+
+proc deallocImpl(p: pointer) =
+  discard
+
+proc allocSharedImpl(size: Natural): pointer = allocImpl(size)
+proc allocShared0Impl(size: Natural): pointer = alloc0Impl(size)
+proc reallocSharedImpl(p: pointer, newsize: Natural): pointer = reallocImpl(p, newsize)
+proc reallocShared0Impl(p: pointer, oldsize, newsize: Natural): pointer = realloc0Impl(p, oldsize, newsize)
+proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+when hasThreadSupport:
+  proc getFreeSharedMem(): int = discard
+  proc getTotalSharedMem(): int = discard
+  proc getOccupiedSharedMem(): int = discard
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  writebarrierptr(addr(result), goMalloc(size.uint))
+  if typ.finalizer != nil:
+    goSetFinalizer(result, typ.finalizer)
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  writebarrierptr(addr(result), newObj(typ, size))
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  writebarrierptr(addr(result), newObj(typ, size))
+
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  writebarrierptr(addr(result), newObj(typ, align(GenericSeqSize, typ.base.align) + len * typ.base.size))
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  cast[PGenericSeq](result).elemSize = typ.base.size
+  cast[PGenericSeq](result).elemAlign = typ.base.align
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  writebarrierptr(addr(result), newSeq(typ, len))
+
+proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
+  result = newObjNoInit(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
+  cast[PGenericSeq](result).len = 0
+  cast[PGenericSeq](result).reserved = cap
+  cast[PGenericSeq](result).elemSize = typ.base.size
+  cast[PGenericSeq](result).elemAlign = typ.base.align
+
+proc typedMemMove(dest: pointer, src: pointer, size: uint) {.importc: "typedmemmove", dynlib: goLib.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  # the Go GC doesn't have a realloc
+  let old = cast[PGenericSeq](old)
+  var metadataOld = cast[PGenericSeq](old)
+  if metadataOld.elemSize == 0:
+    metadataOld.elemSize = 1
+
+  let oldsize = align(GenericSeqSize, old.elemAlign) + old.len * old.elemSize
+  writebarrierptr(addr(result), goMalloc(newsize.uint))
+  typedMemMove(result, old, oldsize.uint)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  writebarrierptr(dest, src)
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  writebarrierptr(dest, src)
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc0Impl(size)
+proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+proc deallocOsPages(r: var MemRegion) {.inline.} = discard
+proc deallocOsPages() {.inline.} = discard
diff --git a/lib/system/mm/malloc.nim b/lib/system/mm/malloc.nim
new file mode 100644
index 000000000..47f1a95ae
--- /dev/null
+++ b/lib/system/mm/malloc.nim
@@ -0,0 +1,97 @@
+
+{.push stackTrace: off.}
+
+proc allocImpl(size: Natural): pointer =
+  result = c_malloc(size.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc alloc0Impl(size: Natural): pointer =
+  result = c_calloc(size.csize_t, 1)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc reallocImpl(p: pointer, newSize: Natural): pointer =
+  result = c_realloc(p, newSize.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc realloc0Impl(p: pointer, oldsize, newSize: Natural): pointer =
+  result = realloc(p, newSize.csize_t)
+  if newSize > oldSize:
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
+
+proc deallocImpl(p: pointer) =
+  c_free(p)
+
+
+# The shared allocators map on the regular ones
+
+proc allocSharedImpl(size: Natural): pointer =
+  allocImpl(size)
+
+proc allocShared0Impl(size: Natural): pointer =
+  alloc0Impl(size)
+
+proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
+  reallocImpl(p, newSize)
+
+proc reallocShared0Impl(p: pointer, oldsize, newSize: Natural): pointer =
+  realloc0Impl(p, oldSize, newSize)
+
+proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+
+# Empty stubs for the GC
+
+proc GC_disable() = discard
+proc GC_enable() = discard
+
+when not defined(gcOrc):
+  proc GC_fullCollect() = discard
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
+
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+
+proc getOccupiedMem(): int = discard
+proc getFreeMem(): int = discard
+proc getTotalMem(): int = discard
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc initGC() = discard
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  result = alloc(size)
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+when not defined(gcDestructors):
+  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc0Impl(size)
+proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+proc deallocOsPages(r: var MemRegion) = discard
+proc deallocOsPages() = discard
+
+{.pop.}
diff --git a/lib/system/mm/none.nim b/lib/system/mm/none.nim
new file mode 100644
index 000000000..7818a0805
--- /dev/null
+++ b/lib/system/mm/none.nim
@@ -0,0 +1,46 @@
+
+when appType == "lib":
+  {.warning: "nogc in a library context may not work".}
+
+include "system/alloc"
+
+proc initGC() = discard
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc GC_fullCollect() = discard
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+proc GC_getStatistics(): string = return ""
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  result = alloc0Impl(size)
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  result = alloc(size)
+
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  result = newObj(typ, align(GenericSeqSize, typ.align) + len * typ.base.size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+{.pop.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+var allocator {.rtlThreadVar.}: MemRegion
+instantiateForRegion(allocator)
+
+include "system/cellsets"
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index b33ca93f2..26f2f0bbf 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -17,10 +17,10 @@ const
   debugGC = false # we wish to debug the GC...
   logGC = false
   traceGC = false # extensive debugging
-  alwaysCycleGC = defined(smokeCycles)
-  alwaysGC = defined(fulldebug) # collect after every memory
+  alwaysCycleGC = defined(nimSmokeCycles)
+  alwaysGC = defined(nimFulldebug) # collect after every memory
                                 # allocation (for debugging)
-  leakDetector = defined(leakDetector)
+  leakDetector = defined(nimLeakDetector)
   overwriteFree = defined(nimBurnFree) # overwrite memory with 0xFF before free
   trackAllocationSource = leakDetector
 
@@ -30,548 +30,84 @@ const
   coalescRight = true
   coalescLeft = true
   logAlloc = false
-  useCellIds = defined(corruption)
+  useCellIds = defined(nimCorruption)
 
 type
   PPointer = ptr pointer
   ByteArray = UncheckedArray[byte]
   PByte = ptr ByteArray
   PString = ptr string
-{.deprecated: [TByteArray: ByteArray].}
 
-# Page size of the system; in most cases 4096 bytes. For exotic OS or
-# CPU this needs to be changed:
-const
-  PageShift = when defined(cpu16): 8 else: 12 # \
-    # my tests showed no improvments for using larger page sizes.
-  PageSize = 1 shl PageShift
-  PageMask = PageSize-1
-
-  MemAlign = 8 # also minimal allocatable memory block
-
-  BitsPerPage = PageSize div MemAlign
-  UnitsPerPage = BitsPerPage div (sizeof(int)*8)
-    # how many ints do we need to describe a page:
-    # on 32 bit systems this is only 16 (!)
-
-  TrunkShift = 9
-  BitsPerTrunk = 1 shl TrunkShift # needs to be power of 2 and divisible by 64
-  TrunkMask = BitsPerTrunk - 1
-  IntsPerTrunk = BitsPerTrunk div (sizeof(int)*8)
-  IntShift = 5 + ord(sizeof(int) == 8) # 5 or 6, depending on int width
-  IntMask = 1 shl IntShift - 1
+when declared(IntsPerTrunk):
+  discard
+else:
+  include bitmasks
 
 proc raiseOutOfMem() {.noinline.} =
   if outOfMemHook != nil: outOfMemHook()
-  echo("out of memory")
-  quit(1)
+  cstderr.rawWrite("out of memory\n")
+  rawQuit(1)
 
 when defined(boehmgc):
-  proc boehmGCinit {.importc: "GC_init", boehmGC.}
-  proc boehmGC_disable {.importc: "GC_disable", boehmGC.}
-  proc boehmGC_enable {.importc: "GC_enable", boehmGC.}
-  proc boehmGCincremental {.
-    importc: "GC_enable_incremental", boehmGC.}
-  proc boehmGCfullCollect {.importc: "GC_gcollect", boehmGC.}
-  proc boehmAlloc(size: int): pointer {.importc: "GC_malloc", boehmGC.}
-  proc boehmAllocAtomic(size: int): pointer {.
-    importc: "GC_malloc_atomic", boehmGC.}
-  proc boehmRealloc(p: pointer, size: int): pointer {.
-    importc: "GC_realloc", boehmGC.}
-  proc boehmDealloc(p: pointer) {.importc: "GC_free", boehmGC.}
-  when hasThreadSupport:
-    proc boehmGC_allow_register_threads {.
-      importc: "GC_allow_register_threads", boehmGC.}
-
-  proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", boehmGC.}
-    ## Return the number of bytes in the heap.  Excludes collector private
-    ## data structures. Includes empty blocks and fragmentation loss.
-    ## Includes some pages that were allocated but never written.
-
-  proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", boehmGC.}
-    ## Return a lower bound on the number of free bytes in the heap.
-
-  proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc", boehmGC.}
-    ## Return the number of bytes allocated since the last collection.
-
-  proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes", boehmGC.}
-    ## Return the total number of bytes allocated in this process.
-    ## Never decreases.
-
-  proc allocAtomic(size: int): pointer =
-    result = boehmAllocAtomic(size)
-    zeroMem(result, size)
-
-  when not defined(useNimRtl):
-
-    proc alloc(size: Natural): pointer =
-      result = boehmAlloc(size)
-      if result == nil: raiseOutOfMem()
-    proc alloc0(size: Natural): pointer =
-      result = alloc(size)
-    proc realloc(p: pointer, newsize: Natural): pointer =
-      result = boehmRealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: pointer) = boehmDealloc(p)
-
-    proc allocShared(size: Natural): pointer =
-      result = boehmAlloc(size)
-      if result == nil: raiseOutOfMem()
-    proc allocShared0(size: Natural): pointer =
-      result = allocShared(size)
-    proc reallocShared(p: pointer, newsize: Natural): pointer =
-      result = boehmRealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc deallocShared(p: pointer) = boehmDealloc(p)
-
-    when hasThreadSupport:
-      proc getFreeSharedMem(): int =
-        boehmGetFreeBytes()
-      proc getTotalSharedMem(): int =
-        boehmGetHeapSize()
-      proc getOccupiedSharedMem(): int =
-        getTotalSharedMem() - getFreeSharedMem()
-
-    #boehmGCincremental()
-
-    proc GC_disable() = boehmGC_disable()
-    proc GC_enable() = boehmGC_enable()
-    proc GC_fullCollect() = boehmGCfullCollect()
-    proc GC_setStrategy(strategy: GC_Strategy) = discard
-    proc GC_enableMarkAndSweep() = discard
-    proc GC_disableMarkAndSweep() = discard
-    proc GC_getStatistics(): string = return ""
-
-    proc getOccupiedMem(): int = return boehmGetHeapSize()-boehmGetFreeBytes()
-    proc getFreeMem(): int = return boehmGetFreeBytes()
-    proc getTotalMem(): int = return boehmGetHeapSize()
-
-    proc nimGC_setStackBottom(theStackBottom: pointer) = discard
-
-  proc initGC() =
-    boehmGCinit()
-    when hasThreadSupport:
-      boehmGC_allow_register_threads()
-
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    if ntfNoRefs in typ.flags: result = allocAtomic(size)
-    else: result = alloc(size)
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
-
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-
-  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  type
-    MemRegion = object {.final, pure.}
-  {.deprecated: [TMemRegion: MemRegion].}
-
-  proc alloc(r: var MemRegion, size: int): pointer =
-    result = boehmAlloc(size)
-    if result == nil: raiseOutOfMem()
-  proc alloc0(r: var MemRegion, size: int): pointer =
-    result = alloc(size)
-    zeroMem(result, size)
-  proc dealloc(r: var MemRegion, p: pointer) = boehmDealloc(p)
-  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
-  proc deallocOsPages() {.inline.} = discard
-
-  include "system/cellsets"
+  include system / mm / boehm
 
 elif defined(gogc):
-  when defined(windows):
-    const goLib = "libgo.dll"
-  elif defined(macosx):
-    const goLib = "libgo.dylib"
-  else:
-    const goLib = "libgo.so"
-
-  proc roundup(x, v: int): int {.inline.} =
-    result = (x + (v-1)) and not (v-1)
-
-  proc initGC() = discard
-  # runtime_setgcpercent is only available in GCC 5
-  proc GC_disable() = discard
-  proc GC_enable() = discard
-  proc goRuntimeGC(force: int32) {.importc: "runtime_gc", dynlib: goLib.}
-  proc GC_fullCollect() = goRuntimeGC(2)
-  proc GC_setStrategy(strategy: GC_Strategy) = discard
-  proc GC_enableMarkAndSweep() = discard
-  proc GC_disableMarkAndSweep() = discard
-
-  const
-    goNumSizeClasses = 67
-
-  type
-    cbool {.importc: "_Bool", nodecl.} = bool
-
-    goMStats_inner_struct = object
-        size: uint32
-        nmalloc: uint64
-        nfree: uint64
-
-    goMStats = object
-        # General statistics.
-        alloc: uint64            # bytes allocated and still in use
-        total_alloc: uint64      # bytes allocated (even if freed)
-        sys: uint64              # bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
-        nlookup: uint64          # number of pointer lookups
-        nmalloc: uint64          # number of mallocs
-        nfree: uint64            # number of frees
-        # Statistics about malloc heap.
-        # protected by mheap.Lock
-        heap_alloc: uint64       # bytes allocated and still in use
-        heap_sys: uint64         # bytes obtained from system
-        heap_idle: uint64        # bytes in idle spans
-        heap_inuse: uint64       # bytes in non-idle spans
-        heap_released: uint64    # bytes released to the OS
-        heap_objects: uint64 # total number of allocated objects
-        # Statistics about allocation of low-level fixed-size structures.
-        # Protected by FixAlloc locks.
-        stacks_inuse: uint64     # bootstrap stacks
-        stacks_sys: uint64
-        mspan_inuse: uint64      # MSpan structures
-        mspan_sys: uint64
-        mcache_inuse: uint64     # MCache structures
-        mcache_sys: uint64
-        buckhash_sys: uint64     # profiling bucket hash table
-        gc_sys: uint64
-        other_sys: uint64
-        # Statistics about garbage collector.
-        # Protected by mheap or stopping the world during GC.
-        next_gc: uint64          # next GC (in heap_alloc time)
-        last_gc: uint64          # last GC (in absolute time)
-        pause_total_ns: uint64
-        pause_ns: array[256, uint64] # circular buffer of recent gc pause lengths
-        pause_end: array[256, uint64] # circular buffer of recent gc end times (nanoseconds since 1970)
-        numgc: uint32
-        numforcedgc: uint32      # number of user-forced GCs
-        gc_cpu_fraction: float64 # fraction of CPU time used by GC
-        enablegc: cbool
-        debuggc: cbool
-        # Statistics about allocation size classes.
-        by_size: array[goNumSizeClasses, goMStats_inner_struct]
-        # Statistics below here are not exported to MemStats directly.
-        tinyallocs: uint64       # number of tiny allocations that didn't cause actual allocation; not exported to go directly
-        gc_trigger: uint64
-        heap_live: uint64
-        heap_scan: uint64
-        heap_marked: uint64
-
-  proc goRuntime_ReadMemStats(a2: ptr goMStats) {.cdecl,
-    importc: "runtime_ReadMemStats",
-    codegenDecl: "$1 $2$3 __asm__ (\"runtime.ReadMemStats\");\n$1 $2$3",
-    dynlib: goLib.}
-
-  proc GC_getStatistics(): string =
-    var mstats: goMStats
-    goRuntime_ReadMemStats(addr mstats)
-    result = "[GC] total allocated memory: " & $(mstats.total_alloc) & "\n" &
-             "[GC] total memory obtained from system: " & $(mstats.sys) & "\n" &
-             "[GC] occupied memory: " & $(mstats.alloc) & "\n" &
-             "[GC] number of pointer lookups: " & $(mstats.nlookup) & "\n" &
-             "[GC] number of mallocs: " & $(mstats.nmalloc) & "\n" &
-             "[GC] number of frees: " & $(mstats.nfree) & "\n" &
-             "[GC] heap objects: " & $(mstats.heap_objects) & "\n" &
-             "[GC] numgc: " & $(mstats.numgc) & "\n" &
-             "[GC] enablegc: " & $(mstats.enablegc) & "\n" &
-             "[GC] debuggc: " & $(mstats.debuggc) & "\n" &
-             "[GC] total pause time [ms]: " & $(mstats.pause_total_ns div 1000_000)
-
-  proc getOccupiedMem(): int =
-    var mstats: goMStats
-    goRuntime_ReadMemStats(addr mstats)
-    result = int(mstats.alloc)
-
-  proc getFreeMem(): int =
-    var mstats: goMStats
-    goRuntime_ReadMemStats(addr mstats)
-    result = int(mstats.sys - mstats.alloc)
-
-  proc getTotalMem(): int =
-    var mstats: goMStats
-    goRuntime_ReadMemStats(addr mstats)
-    result = int(mstats.sys)
-
-  proc nimGC_setStackBottom(theStackBottom: pointer) = discard
-
-  proc alloc(size: Natural): pointer =
-    result = c_malloc(size)
-    if result == nil: raiseOutOfMem()
-
-  proc alloc0(size: Natural): pointer =
-    result = alloc(size)
-    zeroMem(result, size)
-
-  proc realloc(p: pointer, newsize: Natural): pointer =
-    result = c_realloc(p, newsize)
-    if result == nil: raiseOutOfMem()
-
-  proc dealloc(p: pointer) = c_free(p)
-
-  proc allocShared(size: Natural): pointer =
-    result = c_malloc(size)
-    if result == nil: raiseOutOfMem()
-
-  proc allocShared0(size: Natural): pointer =
-    result = alloc(size)
-    zeroMem(result, size)
+  include system / mm / go
 
-  proc reallocShared(p: pointer, newsize: Natural): pointer =
-    result = c_realloc(p, newsize)
-    if result == nil: raiseOutOfMem()
+elif (defined(nogc) or defined(gcDestructors)) and defined(useMalloc):
+  include system / mm / malloc
 
-  proc deallocShared(p: pointer) = c_free(p)
+  when defined(nogc):
+    proc GC_getStatistics(): string = ""
+    proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+      result = alloc0(size)
 
-  when hasThreadSupport:
-    proc getFreeSharedMem(): int = discard
-    proc getTotalSharedMem(): int = discard
-    proc getOccupiedSharedMem(): int = discard
-
-  const goFlagNoZero: uint32 = 1 shl 3
-  proc goRuntimeMallocGC(size: uint, typ: uint, flag: uint32): pointer {.importc: "runtime_mallocgc", dynlib: goLib.}
-
-  proc goSetFinalizer(obj: pointer, f: pointer) {.importc: "set_finalizer", codegenDecl:"$1 $2$3 __asm__ (\"main.Set_finalizer\");\n$1 $2$3", dynlib: goLib.}
-
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    result = goRuntimeMallocGC(roundup(size, sizeof(pointer)).uint, 0.uint, 0.uint32)
-    if typ.finalizer != nil:
-      goSetFinalizer(result, typ.finalizer)
-
-  proc newObjNoInit(typ: PNimType, size: int): pointer =
-    result = goRuntimeMallocGC(roundup(size, sizeof(pointer)).uint, 0.uint, goFlagNoZero)
-    if typ.finalizer != nil:
-      goSetFinalizer(result, typ.finalizer)
-
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, len * typ.base.size + GenericSeqSize)
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-    cast[PGenericSeq](result).elemSize = typ.base.size
-
-  proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
-    result = newObj(typ, cap * typ.base.size + GenericSeqSize)
-    cast[PGenericSeq](result).len = 0
-    cast[PGenericSeq](result).reserved = cap
-    cast[PGenericSeq](result).elemSize = typ.base.size
-
-  proc growObj(old: pointer, newsize: int): pointer =
-    # the Go GC doesn't have a realloc
-    var
-      oldsize = cast[PGenericSeq](old).len * cast[PGenericSeq](old).elemSize + GenericSeqSize
-    result = goRuntimeMallocGC(roundup(newsize, sizeof(pointer)).uint, 0.uint, goFlagNoZero)
-    copyMem(result, old, oldsize)
-    zeroMem(cast[pointer](cast[ByteAddress](result) +% oldsize), newsize - oldsize)
-
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-
-  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  type
-    MemRegion = object {.final, pure.}
-  {.deprecated: [TMemRegion: MemRegion].}
-
-  proc alloc(r: var MemRegion, size: int): pointer =
-    result = alloc(size)
-  proc alloc0(r: var MemRegion, size: int): pointer =
-    result = alloc0(size)
-  proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
-  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
-  proc deallocOsPages() {.inline.} = discard
-
-elif defined(nogc) and defined(useMalloc):
-
-  when not defined(useNimRtl):
-    proc alloc(size: Natural): pointer =
-      var x = c_malloc(size + sizeof(size))
-      if x == nil: raiseOutOfMem()
-
-      cast[ptr int](x)[] = size
-      result = cast[pointer](cast[int](x) + sizeof(size))
-
-    proc alloc0(size: Natural): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc realloc(p: pointer, newsize: Natural): pointer =
-      var x = cast[pointer](cast[int](p) - sizeof(newsize))
-      let oldsize = cast[ptr int](x)[]
-
-      x = c_realloc(x, newsize + sizeof(newsize))
-
-      if x == nil: raiseOutOfMem()
-
-      cast[ptr int](x)[] = newsize
-      result = cast[pointer](cast[int](x) + sizeof(newsize))
-
-      if newsize > oldsize:
-        zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
-
-    proc dealloc(p: pointer) = c_free(cast[pointer](cast[int](p) - sizeof(int)))
-
-    proc allocShared(size: Natural): pointer =
-      result = c_malloc(size)
-      if result == nil: raiseOutOfMem()
-    proc allocShared0(size: Natural): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc reallocShared(p: pointer, newsize: Natural): pointer =
-      result = c_realloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc deallocShared(p: pointer) = c_free(p)
-
-    proc GC_disable() = discard
-    proc GC_enable() = discard
-    proc GC_fullCollect() = discard
-    proc GC_setStrategy(strategy: GC_Strategy) = discard
-    proc GC_enableMarkAndSweep() = discard
-    proc GC_disableMarkAndSweep() = discard
-    proc GC_getStatistics(): string = return ""
-
-    proc getOccupiedMem(): int = discard
-    proc getFreeMem(): int = discard
-    proc getTotalMem(): int = discard
-
-    proc nimGC_setStackBottom(theStackBottom: pointer) = discard
-
-  proc initGC() = discard
-
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    result = alloc0(size)
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-
-  proc newObjNoInit(typ: PNimType, size: int): pointer =
-    result = alloc(size)
-
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
-
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-
-  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  type
-    MemRegion = object {.final, pure.}
-  {.deprecated: [TMemRegion: MemRegion].}
-
-  proc alloc(r: var MemRegion, size: int): pointer =
-    result = alloc(size)
-  proc alloc0(r: var MemRegion, size: int): pointer =
-    result = alloc0(size)
-  proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
-  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
-  proc deallocOsPages() {.inline.} = discard
+    proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+      result = newObj(typ, align(GenericSeqSize, typ.align) + len * typ.base.size)
+      cast[PGenericSeq](result).len = len
+      cast[PGenericSeq](result).reserved = len
 
 elif defined(nogc):
-  # Even though we don't want the GC, we cannot simply use C's memory manager
-  # because Nim's runtime wants ``realloc`` to zero out the additional
-  # space which C's ``realloc`` does not. And we cannot get the old size of an
-  # object, because C does not support this operation... Even though every
-  # possible implementation has to have a way to determine the object's size.
-  # C just sucks.
-  when appType == "lib":
-    {.warning: "nogc in a library context may not work".}
-
-  include "system/alloc"
-
-  proc initGC() = discard
-  proc GC_disable() = discard
-  proc GC_enable() = discard
-  proc GC_fullCollect() = discard
-  proc GC_setStrategy(strategy: GC_Strategy) = discard
-  proc GC_enableMarkAndSweep() = discard
-  proc GC_disableMarkAndSweep() = discard
-  proc GC_getStatistics(): string = return ""
-
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    result = alloc0(size)
-
-  proc newObjNoInit(typ: PNimType, size: int): pointer =
-    result = alloc(size)
-
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
-
-  proc nimGC_setStackBottom(theStackBottom: pointer) = discard
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-
-  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  var allocator {.rtlThreadVar.}: MemRegion
-  instantiateForRegion(allocator)
-
-  include "system/cellsets"
+  include system / mm / none
 
 else:
   when not defined(gcRegions):
     include "system/alloc"
 
-    include "system/cellsets"
-    when not leakDetector and not useCellIds:
+    when not usesDestructors:
+      include "system/cellsets"
+    when not leakDetector and not useCellIds and not defined(nimV2):
       sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
-  when compileOption("gc", "v2"):
-    include "system/gc2"
-  elif defined(gcRegions):
+  when defined(gcRegions):
     # XXX due to bootstrapping reasons, we cannot use  compileOption("gc", "stack") here
     include "system/gc_regions"
+  elif defined(nimV2) or usesDestructors:
+    when not defined(useNimRtl):
+      var allocator {.rtlThreadVar.}: MemRegion
+      instantiateForRegion(allocator)
+    when defined(gcHooks):
+      include "system/gc_hooks"
   elif defined(gcMarkAndSweep):
     # XXX use 'compileOption' here
     include "system/gc_ms"
-  elif defined(gcGenerational):
-    include "system/gc"
   else:
     include "system/gc"
 
-when not declared(nimNewSeqOfCap):
+when not declared(nimNewSeqOfCap) and not defined(nimSeqsV2):
+  {.push overflowChecks: on.}
   proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
     when defined(gcRegions):
-      let s = mulInt(cap, typ.base.size)  # newStr already adds GenericSeqSize
+      let s = cap * typ.base.size  # newStr already adds GenericSeqSize
       result = newStr(typ, s, ntfNoRefs notin typ.base.flags)
     else:
-      let s = addInt(mulInt(cap, typ.base.size), GenericSeqSize)
+      let s = align(GenericSeqSize, typ.base.align) + cap * typ.base.size
       when declared(newObjNoInit):
         result = if ntfNoRefs in typ.base.flags: newObjNoInit(typ, s) else: newObj(typ, s)
       else:
         result = newObj(typ, s)
       cast[PGenericSeq](result).len = 0
       cast[PGenericSeq](result).reserved = cap
+  {.pop.}
 
 {.pop.}
 
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index 7671e5962..cf81f6d86 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -7,17 +7,18 @@
 #    distribution, for details about the copyright.
 #
 
+## To learn about scripting in Nim see `NimScript<nims.html>`_
 
 # Nim's configuration system now uses Nim for scripting. This module provides
 # a few things that are required for this to work.
 
 const
   buildOS* {.magic: "BuildOS".}: string = ""
-    ## The OS this build is running on. Can be different from ``system.hostOS``
+    ## The OS this build is running on. Can be different from `system.hostOS`
     ## for cross compilations.
 
   buildCPU* {.magic: "BuildCPU".}: string = ""
-    ## The CPU this build is running on. Can be different from ``system.hostCPU``
+    ## The CPU this build is running on. Can be different from `system.hostCPU`
     ## for cross compilations.
 
 template builtin = discard
@@ -25,14 +26,11 @@ template builtin = discard
 # We know the effects better than the compiler:
 {.push hint[XDeclaredButNotUsed]: off.}
 
-proc listDirs*(dir: string): seq[string] =
-  ## Lists all the subdirectories (non-recursively) in the directory `dir`.
-  builtin
-proc listFiles*(dir: string): seq[string] =
-  ## Lists all the files (non-recursively) in the directory `dir`.
-  builtin
-
-proc removeDir(dir: string){.
+proc listDirsImpl(dir: string): seq[string] {.
+  tags: [ReadIOEffect], raises: [OSError].} = builtin
+proc listFilesImpl(dir: string): seq[string] {.
+  tags: [ReadIOEffect], raises: [OSError].} = builtin
+proc removeDir(dir: string, checkDir = true) {.
   tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
 proc removeFile(dir: string) {.
   tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
@@ -46,7 +44,8 @@ proc copyDir(src, dest: string) {.
   tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
 proc createDir(dir: string) {.tags: [WriteIOEffect], raises: [OSError].} =
   builtin
-proc getOsError: string = builtin
+
+proc getError: string = builtin
 proc setCurrentDir(dir: string) = builtin
 proc getCurrentDir*(): string =
   ## Retrieves the current working directory.
@@ -58,7 +57,7 @@ proc warningImpl(arg, orig: string) = discard
 proc hintImpl(arg, orig: string) = discard
 
 proc paramStr*(i: int): string =
-  ## Retrieves the ``i``'th command line parameter.
+  ## Retrieves the `i`'th command line parameter.
   builtin
 
 proc paramCount*(): int =
@@ -67,32 +66,32 @@ proc paramCount*(): int =
 
 proc switch*(key: string, val="") =
   ## Sets a Nim compiler command line switch, for
-  ## example ``switch("checks", "on")``.
+  ## example `switch("checks", "on")`.
   builtin
 
 proc warning*(name: string; val: bool) =
   ## Disables or enables a specific warning.
   let v = if val: "on" else: "off"
-  warningImpl(name & "]:" & v, "warning[" & name & "]:" & v)
+  warningImpl(name & ":" & v, "warning:" & name & ":" & v)
 
 proc hint*(name: string; val: bool) =
   ## Disables or enables a specific hint.
   let v = if val: "on" else: "off"
-  hintImpl(name & "]:" & v, "hint[" & name & "]:" & v)
+  hintImpl(name & ":" & v, "hint:" & name & ":" & v)
 
 proc patchFile*(package, filename, replacement: string) =
   ## Overrides the location of a given file belonging to the
   ## passed package.
-  ## If the ``replacement`` is not an absolute path, the path
+  ## If the `replacement` is not an absolute path, the path
   ## is interpreted to be local to the Nimscript file that contains
-  ## the call to ``patchFile``, Nim's ``--path`` is not used at all
+  ## the call to `patchFile`, Nim's `--path` is not used at all
   ## to resolve the filename!
+  ## The compiler also performs `path substitution <nimc.html#compiler-usage-commandminusline-switches>`_ on `replacement`.
   ##
   ## Example:
-  ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##   patchFile("stdlib", "asyncdispatch", "patches/replacement")
+  ##   ```
   discard
 
 proc getCommand*(): string =
@@ -121,7 +120,11 @@ proc existsEnv*(key: string): bool {.tags: [ReadIOEffect].} =
   builtin
 
 proc putEnv*(key, val: string) {.tags: [WriteIOEffect].} =
-  ## Sets the value of the environment variable named key to val.
+  ## Sets the value of the environment variable named `key` to `val`.
+  builtin
+
+proc delEnv*(key: string) {.tags: [WriteIOEffect].} =
+  ## Deletes the environment variable named `key`.
   builtin
 
 proc fileExists*(filename: string): bool {.tags: [ReadIOEffect].} =
@@ -133,15 +136,7 @@ proc dirExists*(dir: string): bool {.
   ## Checks if the directory `dir` exists.
   builtin
 
-proc existsFile*(filename: string): bool =
-  ## An alias for ``fileExists``.
-  fileExists(filename)
-
-proc existsDir*(dir: string): bool =
-  ## An alias for ``dirExists``.
-  dirExists(dir)
-
-proc selfExe*(): string =
+proc selfExe*(): string {.deprecated: "Deprecated since v1.7; Use getCurrentCompilerExe".} =
   ## Returns the currently running nim or nimble executable.
   builtin
 
@@ -155,16 +150,28 @@ proc toDll*(filename: string): string =
 
 proc strip(s: string): string =
   var i = 0
-  while s[i] in {' ', '\c', '\L'}: inc i
+  while s[i] in {' ', '\c', '\n'}: inc i
   result = s.substr(i)
+  if result[0] == '"' and result[^1] == '"':
+    result = result[1..^2]
 
 template `--`*(key, val: untyped) =
-  ## A shortcut for ``switch(astToStr(key), astToStr(val))``.
-  switch(astToStr(key), strip astToStr(val))
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --path:somePath # same as switch("path", "somePath")
+  ##   --path:"someOtherPath" # same as switch("path", "someOtherPath")
+  ##   --hint:"[Conf]:off" # same as switch("hint", "[Conf]:off")
+  ##   ```
+  switch(strip(astToStr(key)), strip(astToStr(val)))
 
 template `--`*(key: untyped) =
-  ## A shortcut for ``switch(astToStr(key)``.
-  switch(astToStr(key), "")
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --listCmd # same as switch("listCmd")
+  ##   ```
+  switch(strip(astToStr(key)))
 
 type
   ScriptMode* {.pure.} = enum ## Controls the behaviour of the script.
@@ -177,20 +184,33 @@ var
   mode*: ScriptMode ## Set this to influence how mkDir, rmDir, rmFile etc.
                     ## behave
 
+template checkError(exc: untyped): untyped =
+  let err = getError()
+  if err.len > 0: raise newException(exc, err)
+
 template checkOsError =
-  let err = getOsError()
-  if err.len > 0: raise newException(OSError, err)
+  checkError(OSError)
 
 template log(msg: string, body: untyped) =
-  if mode == ScriptMode.Verbose or mode == ScriptMode.Whatif:
+  if mode in {ScriptMode.Verbose, ScriptMode.Whatif}:
     echo "[NimScript] ", msg
-  if mode != ScriptMode.WhatIf:
+  if mode != ScriptMode.Whatif:
     body
 
-proc rmDir*(dir: string) {.raises: [OSError].} =
+proc listDirs*(dir: string): seq[string] =
+  ## Lists all the subdirectories (non-recursively) in the directory `dir`.
+  result = listDirsImpl(dir)
+  checkOsError()
+
+proc listFiles*(dir: string): seq[string] =
+  ## Lists all the files (non-recursively) in the directory `dir`.
+  result = listFilesImpl(dir)
+  checkOsError()
+
+proc rmDir*(dir: string, checkDir = false) {.raises: [OSError].} =
   ## Removes the directory `dir`.
   log "rmDir: " & dir:
-    removeDir dir
+    removeDir(dir, checkDir = checkDir)
     checkOsError()
 
 proc rmFile*(file: string) {.raises: [OSError].} =
@@ -230,22 +250,40 @@ proc cpDir*(`from`, to: string) {.raises: [OSError].} =
     copyDir `from`, to
     checkOsError()
 
-proc exec*(command: string) =
-  ## Executes an external process.
+proc exec*(command: string) {.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
+  ## Executes an external process. If the external process terminates with
+  ## a non-zero exit code, an OSError exception is raised. The command is
+  ## executed relative to the current source path.
+  ##
+  ## .. note:: If you need a version of `exec` that returns the exit code
+  ##   and text output of the command, you can use `system.gorgeEx
+  ##   <system.html#gorgeEx,string,string,string>`_.
   log "exec: " & command:
     if rawExec(command) != 0:
       raise newException(OSError, "FAILED: " & command)
     checkOsError()
 
 proc exec*(command: string, input: string, cache = "") {.
-  raises: [OSError], tags: [ExecIOEffect].} =
-  ## Executes an external process.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
+  ## Executes an external process. If the external process terminates with
+  ## a non-zero exit code, an OSError exception is raised.
+  ##
+  ## .. warning:: This version of `exec` is executed relative to the nimscript
+  ##   module path, which affects how the command resolves relative paths. Thus
+  ##   it is generally better to use `gorgeEx` directly when you need more
+  ##   control over the execution environment or when working with commands
+  ##   that deal with relative paths.
   log "exec: " & command:
-    echo staticExec(command, input, cache)
+    let (output, exitCode) = gorgeEx(command, input, cache)
+    echo output
+    if exitCode != 0:
+      raise newException(OSError, "FAILED: " & command)
 
-proc selfExec*(command: string) =
+proc selfExec*(command: string) {.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external command with the current nim/nimble executable.
-  ## ``Command`` must not contain the "nim " part.
+  ## `Command` must not contain the "nim " part.
   let c = selfExe() & " " & command
   log "exec: " & c:
     if rawExec(c) != 0:
@@ -269,17 +307,31 @@ proc nimcacheDir*(): string =
   ## Retrieves the location of 'nimcache'.
   builtin
 
+proc projectName*(): string =
+  ## Retrieves the name of the current project
+  builtin
+
+proc projectDir*(): string =
+  ## Retrieves the absolute directory of the current project
+  builtin
+
+proc projectPath*(): string =
+  ## Retrieves the absolute path of the current project
+  builtin
+
 proc thisDir*(): string =
-  ## Retrieves the location of the current ``nims`` script file.
+  ## Retrieves the directory of the current `nims` script file. Its path is
+  ## obtained via `currentSourcePath` (although, currently,
+  ## `currentSourcePath` resolves symlinks, unlike `thisDir`).
   builtin
 
 proc cd*(dir: string) {.raises: [OSError].} =
   ## Changes the current directory.
   ##
   ## The change is permanent for the rest of the execution, since this is just
-  ## a shortcut for `os.setCurrentDir()
-  ## <http://nim-lang.org/docs/os.html#setCurrentDir,string>`_ . Use the `withDir()
-  ## <#withDir>`_ template if you want to perform a temporary change only.
+  ## a shortcut for `os.setCurrentDir() <os.html#setCurrentDir,string>`_ . Use
+  ## the `withDir() <#withDir.t,string,untyped>`_ template if you want to
+  ## perform a temporary change only.
   setCurrentDir(dir)
   checkOsError()
 
@@ -292,50 +344,88 @@ proc findExe*(bin: string): string =
 template withDir*(dir: string; body: untyped): untyped =
   ## Changes the current directory temporarily.
   ##
-  ## If you need a permanent change, use the `cd() <#cd>`_ proc. Usage example:
-  ##
-  ## .. code-block:: nim
+  ## If you need a permanent change, use the `cd() <#cd,string>`_ proc.
+  ## Usage example:
+  ##   ```nim
+  ##   # inside /some/path/
   ##   withDir "foo":
-  ##     # inside foo
-  ##   #back to last dir
-  var curDir = getCurrentDir()
+  ##     # move to /some/path/foo/
+  ##   # back in /some/path/
+  ##   ```
+  let curDir = getCurrentDir()
   try:
     cd(dir)
     body
   finally:
     cd(curDir)
 
-template `==?`(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
-
 proc writeTask(name, desc: string) =
   if desc.len > 0:
     var spaces = " "
     for i in 0 ..< 20 - name.len: spaces.add ' '
     echo name, spaces, desc
 
-template task*(name: untyped; description: string; body: untyped): untyped =
-  ## Defines a task. Hidden tasks are supported via an empty description.
-  ## Example:
-  ##
-  ## .. code-block:: nim
-  ##  task build, "default build is via the C backend":
-  ##    setCommand "c"
-  proc `name Task`*() = body
-
-  let cmd = getCommand()
-  if cmd.len == 0 or cmd ==? "help":
-    setCommand "help"
-    writeTask(astToStr(name), description)
-  elif cmd ==? astToStr(name):
-    setCommand "nop"
-    `name Task`()
-
 proc cppDefine*(define: string) =
-  ## tell Nim that ``define`` is a C preprocessor ``#define`` and so always
+  ## tell Nim that `define` is a C preprocessor `#define` and so always
   ## needs to be mangled.
   builtin
 
+proc stdinReadLine(): string {.
+  tags: [ReadIOEffect], raises: [IOError].} =
+  builtin
+
+proc stdinReadAll(): string {.
+  tags: [ReadIOEffect], raises: [IOError].} =
+  builtin
+
+proc readLineFromStdin*(): string {.raises: [IOError].} =
+  ## Reads a line of data from stdin - blocks until \n or EOF which happens when stdin is closed
+  log "readLineFromStdin":
+    result = stdinReadLine()
+    checkError(EOFError)
+
+proc readAllFromStdin*(): string {.raises: [IOError].} =
+  ## Reads all data from stdin - blocks until EOF which happens when stdin is closed
+  log "readAllFromStdin":
+    result = stdinReadAll()
+    checkError(EOFError)
+
 when not defined(nimble):
+  template `==?`(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
+  template task*(name: untyped; description: string; body: untyped): untyped =
+    ## Defines a task. Hidden tasks are supported via an empty description.
+    ##
+    ## Example:
+    ##   ```nim
+    ##   task build, "default build is via the C backend":
+    ##     setCommand "c"
+    ##   ```
+    ##
+    ## For a task named `foo`, this template generates a `proc` named
+    ## `fooTask`.  This is useful if you need to call one task in
+    ## another in your Nimscript.
+    ##
+    ## Example:
+    ##
+    ##   ```nim
+    ##   task foo, "foo":        # > nim foo
+    ##     echo "Running foo"    # Running foo
+    ##
+    ##   task bar, "bar":        # > nim bar
+    ##     echo "Running bar"    # Running bar
+    ##     fooTask()             # Running foo
+    ##   ```
+    proc `name Task`*() =
+      setCommand "nop"
+      body
+
+    let cmd = getCommand()
+    if cmd.len == 0 or cmd ==? "help":
+      setCommand "help"
+      writeTask(astToStr(name), description)
+    elif cmd ==? astToStr(name):
+      `name Task`()
+
   # nimble has its own implementation for these things.
   var
     packageName* = ""    ## Nimble support: Set this to the package name. It
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
new file mode 100644
index 000000000..c02a24989
--- /dev/null
+++ b/lib/system/orc.nim
@@ -0,0 +1,543 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Cycle collector based on
+# https://www.cs.purdue.edu/homes/hosking/690M/Bacon01Concurrent.pdf
+# And ideas from Lins' in 2008 by the notion of "critical links", see
+# "Cyclic reference counting" by Rafael Dueire Lins
+# R.D. Lins / Information Processing Letters 109 (2008) 71–78
+#
+
+include cellseqs_v2
+
+const
+  colBlack = 0b000
+  colGray = 0b001
+  colWhite = 0b010
+  maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
+  jumpStackFlag = 0b1000
+  colorMask = 0b011
+
+  logOrc = defined(nimArcIds)
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
+  DisposeProc = proc (p: pointer) {.nimcall, benign.}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  when col == colBlack:
+    c.rc = c.rc and not colorMask
+  else:
+    c.rc = c.rc and not colorMask or col
+
+const
+  optimizedOrc = false # not defined(nimOldOrc)
+# XXX Still incorrect, see tests/arc/tdestroy_in_loopcond
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  inc h.rc, rcIncrement
+  when optimizedOrc:
+    if cyclic:
+      h.rc = h.rc or maybeCycle
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
+  when optimizedOrc:
+    if p != nil:
+      let h = head(p)
+      h.rc = h.rc or maybeCycle
+
+proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+  # This is only used by the old RTTI mechanism and we know
+  # that 'dest[]' is nil and needs no destruction. Which is really handy
+  # as we cannot destroy the object reliably if it's an object of unknown
+  # compile-time type.
+  dest[] = src
+  if src != nil: nimIncRefCyclic(src, true)
+
+const
+  useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+    when useJumpStack:
+      jumpStack: CellSeq[ptr pointer]   # Lins' jump stack in order to speed up traversals
+    toFree: CellSeq[Cell]
+    freed, touched, edges, rcSum: int
+    keepThreshold: bool
+
+proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  if desc.traceImpl != nil:
+    var p = s +! sizeof(RefHeader)
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+include threadids
+
+when logOrc or orcLeakDetector:
+  proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
+    when orcLeakDetector:
+      cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.filename, s.line, s.color, getThreadId())
+    else:
+      cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.refId, s.rootIdx, s.rc shr rcShift, s.color, getThreadId())
+
+proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
+  when traceCollector:
+    cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
+  let p = s +! sizeof(RefHeader)
+
+  when logOrc: writeCell("free", s, desc)
+
+  if desc.destructor != nil:
+    cast[DestructorProc](desc.destructor)(p)
+
+  when false:
+    cstderr.rawWrite desc.name
+    cstderr.rawWrite " "
+    if desc.destructor == nil:
+      cstderr.rawWrite "lacks dispose"
+      if desc.traceImpl != nil:
+        cstderr.rawWrite ", but has trace\n"
+      else:
+        cstderr.rawWrite ", and lacks trace\n"
+    else:
+      cstderr.rawWrite "has dispose!\n"
+
+  nimRawDispose(p, desc.align)
+
+template orcAssert(cond, msg) =
+  when logOrc:
+    if not cond:
+      cfprintf(cstderr, "[Bug!] %s\n", msg)
+      rawQuit 1
+
+when logOrc:
+  proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+
+    orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!"
+
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+var
+  roots {.threadvar.}: CellSeq[Cell]
+
+proc unregisterCycle(s: Cell) =
+  # swap with the last element. O(1)
+  let idx = s.rootIdx-1
+  when false:
+    if idx >= roots.len or idx < 0:
+      cprintf("[Bug!] %ld %ld\n", idx, roots.len)
+      rawQuit 1
+  roots.d[idx] = roots.d[roots.len-1]
+  roots.d[idx][0].rootIdx = idx+1
+  dec roots.len
+  s.rootIdx = 0
+
+proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scanBlack(s: Cell) =
+    setColor(s, colBlack)
+    for t in sons(s):
+      t.rc = t.rc + rcIncrement
+      if t.color != colBlack:
+        scanBlack(t)
+  ]#
+  s.setColor colBlack
+  let until = j.traceStack.len
+  trace(s, desc, j)
+  when logOrc: writeCell("root still alive", s, desc)
+  while j.traceStack.len > until:
+    let (entry, desc) = j.traceStack.pop()
+    let t = head entry[]
+    inc t.rc, rcIncrement
+    if t.color != colBlack:
+      t.setColor colBlack
+      trace(t, desc, j)
+      when logOrc: writeCell("child still alive", t, desc)
+
+proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc markGray(s: Cell) =
+    if s.color != colGray:
+      setColor(s, colGray)
+      for t in sons(s):
+        t.rc = t.rc - rcIncrement
+        if t.color != colGray:
+          markGray(t)
+  ]#
+  if s.color != colGray:
+    s.setColor colGray
+    inc j.touched
+    # keep in mind that refcounts are zero based so add 1 here:
+    inc j.rcSum, (s.rc shr rcShift) + 1
+    orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      dec t.rc, rcIncrement
+      inc j.edges
+      when useJumpStack:
+        if (t.rc shr rcShift) >= 0 and (t.rc and jumpStackFlag) == 0:
+          t.rc = t.rc or jumpStackFlag
+          when traceCollector:
+            cprintf("[Now in jumpstack] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
+          j.jumpStack.add(entry, desc)
+      if t.color != colGray:
+        t.setColor colGray
+        inc j.touched
+        # we already decremented its refcount so account for that:
+        inc j.rcSum, (t.rc shr rcShift) + 2
+        trace(t, desc, j)
+
+proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scan(s: Cell) =
+    if s.color == colGray:
+      if s.rc > 0:
+        scanBlack(s)
+      else:
+        s.setColor(colWhite)
+        for t in sons(s): scan(t)
+  ]#
+  if s.color == colGray:
+    if (s.rc shr rcShift) >= 0:
+      scanBlack(s, desc, j)
+      # XXX this should be done according to Lins' paper but currently breaks
+      #when useJumpStack:
+      #  s.setColor colPurple
+    else:
+      when useJumpStack:
+        # first we have to repair all the nodes we have seen
+        # that are still alive; we also need to mark what they
+        # refer to as alive:
+        while j.jumpStack.len > 0:
+          let (entry, desc) = j.jumpStack.pop
+          let t = head entry[]
+          # not in jump stack anymore!
+          t.rc = t.rc and not jumpStackFlag
+          if t.color == colGray and (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+            # XXX this should be done according to Lins' paper but currently breaks
+            #t.setColor colPurple
+            when traceCollector:
+              cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+      orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
+      s.setColor(colWhite)
+      trace(s, desc, j)
+      while j.traceStack.len > 0:
+        let (entry, desc) = j.traceStack.pop()
+        let t = head entry[]
+        if t.color == colGray:
+          if (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+          else:
+            when useJumpStack:
+              # first we have to repair all the nodes we have seen
+              # that are still alive; we also need to mark what they
+              # refer to as alive:
+              while j.jumpStack.len > 0:
+                let (entry, desc) = j.jumpStack.pop
+                let t = head entry[]
+                # not in jump stack anymore!
+                t.rc = t.rc and not jumpStackFlag
+                if t.color == colGray and (t.rc shr rcShift) >= 0:
+                  scanBlack(t, desc, j)
+                  # XXX this should be done according to Lins' paper but currently breaks
+                  #t.setColor colPurple
+                  when traceCollector:
+                    cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+            t.setColor(colWhite)
+            trace(t, desc, j)
+
+when false:
+  proc writeCell(msg: cstring; s: Cell) =
+    cfprintf(cstderr, "%s %p root index: %ld; RC: %ld; color: %ld\n",
+      msg, s, s.rootIdx, s.rc shr rcShift, s.color)
+
+proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
+  #[
+    was: 'collectWhite'.
+
+  proc collectWhite(s: Cell) =
+    if s.color == colWhite and not buffered(s):
+      s.setColor(colBlack)
+      for t in sons(s):
+        collectWhite(t)
+      free(s) # watch out, a bug here!
+  ]#
+  if s.color == col and s.rootIdx == 0:
+    orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
+
+    s.setColor(colBlack)
+    j.toFree.add(s, desc)
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      entry[] = nil # ensure that the destructor does touch moribund objects!
+      if t.color == col and t.rootIdx == 0:
+        j.toFree.add(t, desc)
+        t.setColor(colBlack)
+        trace(t, desc, j)
+
+const
+  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
+
+when defined(nimStressOrc):
+  const rootsThreshold = 10 # broken with -d:nimStressOrc: 10 and for havlak iterations 1..8
+else:
+  var rootsThreshold {.threadvar.}: int
+
+proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
+  # pretty direct translation from
+  # https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
+  # Fig. 2. Synchronous Cycle Collection
+  #[
+    for s in roots:
+      markGray(s)
+    for s in roots:
+      scan(s)
+    for s in roots:
+      remove s from roots
+      s.buffered = false
+      collectWhite(s)
+  ]#
+  let last = roots.len - 1
+  when logOrc:
+    for i in countdown(last, lowMark):
+      writeCell("root", roots.d[i][0], roots.d[i][1])
+
+  for i in countdown(last, lowMark):
+    markGray(roots.d[i][0], roots.d[i][1], j)
+
+  var colToCollect = colWhite
+  if j.rcSum == j.edges:
+    # short-cut: we know everything is garbage:
+    colToCollect = colGray
+    # remember the fact that we got so lucky:
+    j.keepThreshold = true
+  else:
+    for i in countdown(last, lowMark):
+      scan(roots.d[i][0], roots.d[i][1], j)
+
+  init j.toFree
+  for i in 0 ..< roots.len:
+    let s = roots.d[i][0]
+    s.rootIdx = 0
+    collectColor(s, roots.d[i][1], colToCollect, j)
+
+  # Bug #22927: `free` calls destructors which can append to `roots`.
+  # We protect against this here by setting `roots.len` to 0 and also
+  # setting the threshold so high that no cycle collection can be triggered
+  # until we are out of this critical section:
+  when not defined(nimStressOrc):
+    let oldThreshold = rootsThreshold
+    rootsThreshold = high(int)
+  roots.len = 0
+
+  for i in 0 ..< j.toFree.len:
+    when orcLeakDetector:
+      writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
+    free(j.toFree.d[i][0], j.toFree.d[i][1])
+
+  when not defined(nimStressOrc):
+    rootsThreshold = oldThreshold
+
+  inc j.freed, j.toFree.len
+  deinit j.toFree
+
+when defined(nimOrcStats):
+  var freedCyclicObjects {.threadvar.}: int
+
+proc partialCollect(lowMark: int) =
+  when false:
+    if roots.len < 10 + lowMark: return
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] begin\n")
+  var j: GcEnv
+  init j.traceStack
+  collectCyclesBacon(j, lowMark)
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] end; freed %ld touched: %ld work: %ld\n", j.freed, j.touched,
+      roots.len - lowMark)
+  roots.len = lowMark
+  deinit j.traceStack
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+proc collectCycles() =
+  ## Collect cycles.
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] begin\n")
+
+  var j: GcEnv
+  init j.traceStack
+  when useJumpStack:
+    init j.jumpStack
+    collectCyclesBacon(j, 0)
+    while j.jumpStack.len > 0:
+      let (t, desc) = j.jumpStack.pop
+      # not in jump stack anymore!
+      t.rc = t.rc and not jumpStackFlag
+    deinit j.jumpStack
+  else:
+    collectCyclesBacon(j, 0)
+
+  deinit j.traceStack
+  if roots.len == 0:
+    deinit roots
+
+  when not defined(nimStressOrc):
+    # compute the threshold based on the previous history
+    # of the cycle collector's effectiveness:
+    # we're effective when we collected 50% or more of the nodes
+    # we touched. If we're effective, we can reset the threshold:
+    if j.keepThreshold:
+      discard
+    elif j.freed * 2 >= j.touched:
+      when not defined(nimFixedOrc):
+        rootsThreshold = max(rootsThreshold div 3 * 2, 16)
+      else:
+        rootsThreshold = 0
+      #cfprintf(cstderr, "[collectCycles] freed %ld, touched %ld new threshold %ld\n", j.freed, j.touched, rootsThreshold)
+    elif rootsThreshold < high(int) div 4:
+      rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold) * 3 div 2
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld touched: %ld mem: %ld rcSum: %ld edges: %ld\n", j.freed, rootsThreshold, j.touched,
+      getOccupiedMem(), j.rcSum, j.edges)
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+when defined(nimOrcStats):
+  type
+    OrcStats* = object ## Statistics of the cycle collector subsystem.
+      freedCyclicObjects*: int ## Number of freed cyclic objects.
+  proc GC_orcStats*(): OrcStats =
+    ## Returns the statistics of the cycle collector subsystem.
+    result = OrcStats(freedCyclicObjects: freedCyclicObjects)
+
+proc registerCycle(s: Cell; desc: PNimTypeV2) =
+  s.rootIdx = roots.len+1
+  if roots.d == nil: init(roots)
+  add(roots, s, desc)
+
+  if roots.len - defaultThreshold >= rootsThreshold:
+    collectCycles()
+  when logOrc:
+    writeCell("[added root]", s, desc)
+
+  orcAssert strstr(desc.name, "TType") == nil, "added a TType as a root!"
+
+proc GC_runOrc* =
+  ## Forces a cycle collection pass.
+  collectCycles()
+  orcAssert roots.len == 0, "roots not empty!"
+
+proc GC_enableOrc*() =
+  ## Enables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = 0
+
+proc GC_disableOrc*() =
+  ## Disables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = high(int)
+
+proc GC_prepareOrc*(): int {.inline.} = roots.len
+
+proc GC_partialCollect*(limit: int) =
+  partialCollect(limit)
+
+proc GC_fullCollect* =
+  ## Forces a full garbage collection pass. With `--mm:orc` triggers the cycle
+  ## collector. This is an alias for `GC_runOrc`.
+  collectCycles()
+
+proc GC_enableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_enableOrc`.
+  GC_enableOrc()
+
+proc GC_disableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_disableOrc`.
+  GC_disableOrc()
+
+const
+  acyclicFlag = 1 # see also cggtypes.nim, proc genTypeInfoV2Impl
+
+when optimizedOrc:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0
+else:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0
+
+proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
+  if isDestroyAction:
+    if s.rootIdx > 0:
+      unregisterCycle(s)
+  else:
+    # do not call 'rememberCycle' again unless this cell
+    # got an 'incRef' event:
+    if s.rootIdx == 0 and markedAsCyclic(s, desc):
+      s.setColor colBlack
+      registerCycle(s, desc)
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, cast[ptr PNimTypeV2](p)[])
+
+proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    if result:
+      if cell.rootIdx > 0:
+        unregisterCycle(cell)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p %s\n", p, desc.name)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, desc)
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
index a63eadf8e..5509d0070 100644
--- a/lib/system/osalloc.nim
+++ b/lib/system/osalloc.nim
@@ -28,7 +28,30 @@ const doNotUnmap = not (defined(amd64) or defined(i386)) or
                    defined(windows) or defined(nimAllocNoUnmap)
 
 
-when defined(emscripten):
+when defined(nimAllocPagesViaMalloc):
+  when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+    {.error: "-d:nimAllocPagesViaMalloc is only supported with --mm:arc or --mm:atomicArc or --mm:orc".}
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    let base = c_malloc(csize_t size + PageSize - 1 + sizeof(uint32))
+    if base == nil: raiseOutOfMem()
+    # memory layout: padding + offset (4 bytes) + user_data
+    # in order to deallocate: read offset at user_data - 4 bytes,
+    # then deallocate user_data - offset
+    let offset = PageSize - (cast[int](base) and (PageSize - 1))
+    cast[ptr uint32](base +! (offset - sizeof(uint32)))[] = uint32(offset)
+    result = base +! offset
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = osTryAllocPages(size)
+    if result == nil: raiseOutOfMem()
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    # read offset at p - 4 bytes, then deallocate (p - offset) pointer
+    let offset = cast[ptr uint32](p -! sizeof(uint32))[]
+    c_free(p -! offset)
+
+elif defined(emscripten) and not defined(StandaloneHeapSize):
   const
     PROT_READ  = 1             # page can be read
     PROT_WRITE = 2             # page can be written
@@ -57,12 +80,12 @@ when defined(emscripten):
     let pos = cast[int](result)
 
     # Convert pointer to PageSize correct one.
-    var new_pos = cast[ByteAddress](pos) +% (PageSize - (pos %% PageSize))
+    var new_pos = cast[int](pos) +% (PageSize - (pos %% PageSize))
     if (new_pos-pos) < sizeof(EmscriptenMMapBlock):
       new_pos = new_pos +% PageSize
     result = cast[pointer](new_pos)
 
-    var mmapDescrPos = cast[ByteAddress](result) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](result) -% sizeof(EmscriptenMMapBlock)
 
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     mmapDescr.realSize = realSize
@@ -73,19 +96,25 @@ when defined(emscripten):
   proc osTryAllocPages(size: int): pointer = osAllocPages(size)
 
   proc osDeallocPages(p: pointer, size: int) {.inline.} =
-    var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](p) -% sizeof(EmscriptenMMapBlock)
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     munmap(mmapDescr.realPointer, mmapDescr.realSize)
 
-elif defined(genode):
+elif defined(genode) and not defined(StandaloneHeapSize):
   include genode/alloc # osAllocPages, osTryAllocPages, osDeallocPages
 
-elif defined(posix):
+elif defined(posix) and not defined(StandaloneHeapSize):
   const
     PROT_READ  = 1             # page can be read
     PROT_WRITE = 2             # page can be written
 
-  when defined(macosx) or defined(bsd):
+  when defined(netbsd) or defined(openbsd):
+    # OpenBSD security for setjmp/longjmp coroutines
+    var MAP_STACK {.importc: "MAP_STACK", header: "<sys/mman.h>".}: cint
+  else:
+    const MAP_STACK = 0             # avoid sideeffects
+
+  when defined(macosx) or defined(freebsd):
     const MAP_ANONYMOUS = 0x1000
     const MAP_PRIVATE = 0x02        # Changes are private
   elif defined(solaris):
@@ -96,31 +125,34 @@ elif defined(posix):
     # some arches like mips and alpha use different values
     const MAP_ANONYMOUS = 0x20
     const MAP_PRIVATE = 0x02        # Changes are private
-  else:
+  elif defined(haiku):
+    const MAP_ANONYMOUS = 0x08
+    const MAP_PRIVATE = 0x02
+  else:  # posix including netbsd or openbsd
     var
       MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
       MAP_PRIVATE {.importc: "MAP_PRIVATE", header: "<sys/mman.h>".}: cint
 
-  proc mmap(adr: pointer, len: csize, prot, flags, fildes: cint,
+  proc mmap(adr: pointer, len: csize_t, prot, flags, fildes: cint,
             off: int): pointer {.header: "<sys/mman.h>".}
 
-  proc munmap(adr: pointer, len: csize): cint {.header: "<sys/mman.h>".}
+  proc munmap(adr: pointer, len: csize_t): cint {.header: "<sys/mman.h>".}
 
   proc osAllocPages(size: int): pointer {.inline.} =
-    result = mmap(nil, size, PROT_READ or PROT_WRITE,
-                             MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
+    result = mmap(nil, cast[csize_t](size), PROT_READ or PROT_WRITE,
+                             MAP_ANONYMOUS or MAP_PRIVATE or MAP_STACK, -1, 0)
     if result == nil or result == cast[pointer](-1):
       raiseOutOfMem()
 
   proc osTryAllocPages(size: int): pointer {.inline.} =
-    result = mmap(nil, size, PROT_READ or PROT_WRITE,
-                             MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
+    result = mmap(nil, cast[csize_t](size), PROT_READ or PROT_WRITE,
+                             MAP_ANONYMOUS or MAP_PRIVATE or MAP_STACK, -1, 0)
     if result == cast[pointer](-1): result = nil
 
   proc osDeallocPages(p: pointer, size: int) {.inline.} =
-    when reallyOsDealloc: discard munmap(p, size)
+    when reallyOsDealloc: discard munmap(p, cast[csize_t](size))
 
-elif defined(windows):
+elif defined(windows) and not defined(StandaloneHeapSize):
   const
     MEM_RESERVE = 0x2000
     MEM_COMMIT = 0x1000
@@ -157,13 +189,13 @@ elif defined(windows):
     when reallyOsDealloc:
       if virtualFree(p, 0, MEM_RELEASE) == 0:
         cprintf "virtualFree failing!"
-        quit 1
+        rawQuit 1
     #VirtualFree(p, size, MEM_DECOMMIT)
 
-elif hostOS == "standalone":
+elif hostOS == "standalone" or defined(StandaloneHeapSize):
   const StandaloneHeapSize {.intdefine.}: int = 1024 * PageSize
   var
-    theHeap: array[StandaloneHeapSize, float64] # 'float64' for alignment
+    theHeap: array[StandaloneHeapSize div sizeof(float64), float64] # 'float64' for alignment
     bumpPointer = cast[int](addr theHeap)
 
   proc osAllocPages(size: int): pointer {.inline.} =
@@ -181,5 +213,6 @@ elif hostOS == "standalone":
   proc osDeallocPages(p: pointer, size: int) {.inline.} =
     if bumpPointer-size == cast[int](p):
       dec bumpPointer, size
+
 else:
   {.error: "Port memory manager to your platform".}
diff --git a/lib/system/platforms.nim b/lib/system/platforms.nim
index b561cd3ba..0619f3fca 100644
--- a/lib/system/platforms.nim
+++ b/lib/system/platforms.nim
@@ -10,6 +10,8 @@
 ## Platform detection for NimScript. This module is included by the system module!
 ## Do not import it directly!
 
+# CPU architectures have alias names mapped in tools/niminst/makefile.nimf
+
 type
   CpuPlatform* {.pure.} = enum ## the CPU this program will run on.
     none,                      ## unknown CPU
@@ -20,6 +22,8 @@ type
     powerpc64,                 ## 64 bit PowerPC
     powerpc64el,               ## Little Endian 64 bit PowerPC
     sparc,                     ## Sparc based processor
+    sparc64,                   ## 64-bit Sparc based processor
+    hppa,                      ## HP PA-RISC
     ia64,                      ## Intel Itanium
     amd64,                     ## x86_64 (AMD64); 64 bit x86 compatible CPU
     mips,                      ## Mips based processor
@@ -31,13 +35,17 @@ type
     vm,                        ## Some Virtual machine: Nim's VM or JavaScript
     avr,                       ## AVR based processor
     msp430,                    ## TI MSP430 microcontroller
-    riscv64                    ## RISC-V 64-bit processor
+    riscv32,                   ## RISC-V 32-bit processor
+    riscv64,                   ## RISC-V 64-bit processor
+    wasm32,                    ## WASM, 32-bit
+    e2k,                       ## MCST Elbrus 2000
+    loongarch64,               ## LoongArch 64-bit processor
+    s390x                      ## IBM Z
 
   OsPlatform* {.pure.} = enum ## the OS this program will run on.
     none, dos, windows, os2, linux, morphos, skyos, solaris,
     irix, netbsd, freebsd, openbsd, aix, palmos, qnx, amiga,
-    atari, netware, macos, macosx, haiku, android, js, nimVM,
-    standalone
+    atari, netware, macos, macosx, haiku, android, js, standalone, nintendoswitch
 
 const
   targetOS* = when defined(windows): OsPlatform.windows
@@ -62,8 +70,8 @@ const
               elif defined(haiku): OsPlatform.haiku
               elif defined(android): OsPlatform.android
               elif defined(js): OsPlatform.js
-              elif defined(nimVM): OsPlatform.nimVM
               elif defined(standalone): OsPlatform.standalone
+              elif defined(nintendoswitch): OsPlatform.nintendoswitch
               else: OsPlatform.none
     ## the OS this program will run on.
 
@@ -74,6 +82,8 @@ const
                elif defined(powerpc64): CpuPlatform.powerpc64
                elif defined(powerpc64el): CpuPlatform.powerpc64el
                elif defined(sparc): CpuPlatform.sparc
+               elif defined(sparc64): CpuPlatform.sparc64
+               elif defined(hppa): CpuPlatform.hppa
                elif defined(ia64): CpuPlatform.ia64
                elif defined(amd64): CpuPlatform.amd64
                elif defined(mips): CpuPlatform.mips
@@ -85,6 +95,11 @@ const
                elif defined(vm): CpuPlatform.vm
                elif defined(avr): CpuPlatform.avr
                elif defined(msp430): CpuPlatform.msp430
+               elif defined(riscv32): CpuPlatform.riscv32
                elif defined(riscv64): CpuPlatform.riscv64
+               elif defined(wasm32): CpuPlatform.wasm32
+               elif defined(e2k): CpuPlatform.e2k
+               elif defined(loongarch64): CpuPlatform.loongarch64
+               elif defined(s390x): CpuPlatform.s390x
                else: CpuPlatform.none
     ## the CPU this program will run on.
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 7146500d9..e7eb6ac82 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -13,6 +13,9 @@
 # (except perhaps loops that have no side-effects). At every Nth call a
 # stack trace is taken. A stack tace is a list of cstrings.
 
+when defined(profiler) and defined(memProfiler):
+  {.error: "profiler and memProfiler cannot be defined at the same time (See Embedded Stack Trace Profiler (ESTP) User Guide) for more details".}
+
 {.push profiler: off.}
 
 const
@@ -23,7 +26,6 @@ type
     lines*: array[0..MaxTraceLen-1, cstring]
     files*: array[0..MaxTraceLen-1, cstring]
   ProfilerHook* = proc (st: StackTrace) {.nimcall.}
-{.deprecated: [TStackTrace: StackTrace, TProfilerHook: ProfilerHook].}
 
 proc `[]`*(st: StackTrace, i: int): cstring = st.lines[i]
 
@@ -58,13 +60,13 @@ proc captureStackTrace(f: PFrame, st: var StackTrace) =
     b = b.prev
 
 var
-  profilingRequestedHook*: proc (): bool {.nimcall, benign.}
+  profilingRequestedHook*: proc (): bool {.nimcall, gcsafe.}
     ## set this variable to provide a procedure that implements a profiler in
     ## user space. See the `nimprof` module for a reference implementation.
 
 when defined(memProfiler):
   type
-    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, benign.}
+    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, gcsafe.}
 
   var
     profilerHook*: MemProfilerHook
@@ -88,9 +90,10 @@ else:
   proc callProfilerHook(hook: ProfilerHook) {.noinline.} =
     # 'noinline' so that 'nimProfile' does not perform the stack allocation
     # in the common case.
-    var st: StackTrace
-    captureStackTrace(framePtr, st)
-    hook(st)
+    when not defined(nimdoc):
+      var st: StackTrace
+      captureStackTrace(framePtr, st)
+      hook(st)
 
   proc nimProfile() =
     ## This is invoked by the compiler in every loop and on every proc entry!
diff --git a/lib/system/rawquits.nim b/lib/system/rawquits.nim
new file mode 100644
index 000000000..f0ead10c6
--- /dev/null
+++ b/lib/system/rawquits.nim
@@ -0,0 +1,27 @@
+import system/ctypes
+
+when defined(nimNoQuit):
+  proc rawQuit(errorcode: int = QuitSuccess) = discard "ignoring quit"
+
+elif defined(genode):
+  import genode/env
+
+  var systemEnv {.exportc: runtimeEnvSym.}: GenodeEnvPtr
+
+  type GenodeEnv = GenodeEnvPtr
+    ## Opaque type representing Genode environment.
+
+  proc rawQuit(env: GenodeEnv; errorcode: int) {.magic: "Exit", noreturn,
+    importcpp: "#->parent().exit(@); Genode::sleep_forever()", header: "<base/sleep.h>".}
+
+  proc rawQuit(errorcode: int = QuitSuccess) {.inline, noreturn.} =
+    systemEnv.rawQuit(errorcode)
+
+
+elif defined(js) and defined(nodejs) and not defined(nimscript):
+  proc rawQuit(errorcode: int = QuitSuccess) {.magic: "Exit",
+    importc: "process.exit", noreturn.}
+
+else:
+  proc rawQuit(errorcode: cint) {.
+    magic: "Exit", importc: "exit", header: "<stdlib.h>", noreturn.}
\ No newline at end of file
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 982b07467..13118e40b 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -16,38 +16,17 @@ proc reprInt(x: int64): string {.compilerproc.} = return $x
 proc reprFloat(x: float): string {.compilerproc.} = return $x
 
 proc reprPointer(x: pointer): string {.compilerproc.} =
-  when defined(nimNoArrayToCstringConversion):
-    result = newString(60)
-    let n = c_sprintf(addr result[0], "%p", x)
-    setLen(result, n)
-  else:
-    var buf: array[0..59, char]
-    discard c_sprintf(buf, "%p", x)
-    return $buf
-
-proc `$`(x: uint64): string =
-  if x == 0:
-    result = "0"
-  else:
-    result = newString(60)
-    var i = 0
-    var n = x
-    while n != 0:
-      let nn = n div 10'u64
-      result[i] = char(n - 10'u64 * nn + ord('0'))
-      inc i
-      n = nn
-    result.setLen i
-
-    let half = i div 2
-    # Reverse
-    for t in 0 .. half-1: swap(result[t], result[i-t-1])
+  result = newString(60)
+  let n = c_snprintf(cast[cstring](addr result[0]), csize_t(60), "%p", x)
+  setLen(result, n)
 
 proc reprStrAux(result: var string, s: cstring; len: int) =
   if cast[pointer](s) == nil:
     add result, "nil"
     return
-  add result, reprPointer(cast[pointer](s)) & "\""
+  if len > 0:
+    add result, reprPointer(cast[pointer](s))
+  add result, "\""
   for i in 0 .. pred(len):
     let c = s[i]
     case c
@@ -93,8 +72,10 @@ proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
 
   result = $e & " (invalid data!)"
 
+include system/repr_impl
+
 type
-  PByteArray = ptr array[0xffff, int8]
+  PByteArray = ptr UncheckedArray[byte] # array[0xffff, byte]
 
 proc addSetElem(result: var string, elem: int, typ: PNimType) {.benign.} =
   case typ.kind
@@ -111,22 +92,23 @@ proc reprSetAux(result: var string, p: pointer, typ: PNimType) =
   var elemCounter = 0  # we need this flag for adding the comma at
                        # the right places
   add result, "{"
-  var u: int64
+  var u: uint64
   case typ.size
-  of 1: u = ze64(cast[ptr int8](p)[])
-  of 2: u = ze64(cast[ptr int16](p)[])
-  of 4: u = ze64(cast[ptr int32](p)[])
-  of 8: u = cast[ptr int64](p)[]
+  of 1: u = cast[ptr uint8](p)[]
+  of 2: u = cast[ptr uint16](p)[]
+  of 4: u = cast[ptr uint32](p)[]
+  of 8: u = cast[ptr uint64](p)[]
   else:
+    u = uint64(0)
     var a = cast[PByteArray](p)
     for i in 0 .. typ.size*8-1:
-      if (ze(a[i div 8]) and (1 shl (i mod 8))) != 0:
+      if (uint(a[i shr 3]) and (1'u shl (i and 7))) != 0:
         if elemCounter > 0: add result, ", "
         addSetElem(result, i+typ.node.len, typ.base)
         inc(elemCounter)
   if typ.size <= 8:
     for i in 0..sizeof(int64)*8-1:
-      if (u and (1'i64 shl int64(i))) != 0'i64:
+      if (u and (1'u64 shl uint64(i))) != 0'u64:
         if elemCounter > 0: add result, ", "
         addSetElem(result, i+typ.node.len, typ.base)
         inc(elemCounter)
@@ -143,7 +125,6 @@ type
       marked: CellSet
     recdepth: int       # do not recurse endlessly
     indent: int         # indentation
-{.deprecated: [TReprClosure: ReprClosure].}
 
 when not defined(useNimRtl):
   proc initReprClosure(cl: var ReprClosure) =
@@ -174,19 +155,35 @@ when not defined(useNimRtl):
     var bs = typ.base.size
     for i in 0..typ.size div bs - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), typ.base, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), typ.base, cl)
     add result, "]"
 
+  when defined(nimSeqsV2):
+    type
+      GenericSeq = object
+        len: int
+        p: pointer
+      PGenericSeq = ptr GenericSeq
+    const payloadOffset = sizeof(int) + sizeof(pointer)
+      # see seqs.nim:    cap: int
+      #                  region: Allocator
+
+    template payloadPtr(x: untyped): untyped = cast[PGenericSeq](x).p
+  else:
+    const payloadOffset = GenericSeqSize ## the payload offset always depends on the alignment of the member type.
+    template payloadPtr(x: untyped): untyped = x
+
   proc reprSequence(result: var string, p: pointer, typ: PNimType,
                     cl: var ReprClosure) =
     if p == nil:
-      add result, "nil"
+      add result, "[]"
       return
-    result.add(reprPointer(p) & "[")
+    result.add(reprPointer(p))
+    result.add "@["
     var bs = typ.base.size
     for i in 0..cast[PGenericSeq](p).len-1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + GenericSeqSize + i*bs),
+      reprAux(result, cast[pointer](cast[int](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
               typ.base, cl)
     add result, "]"
 
@@ -197,14 +194,14 @@ when not defined(useNimRtl):
     of nkSlot:
       add result, $n.name
       add result, " = "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
     of nkList:
       for i in 0..n.len-1:
         if i > 0: add result, ",\n"
         reprRecordAux(result, p, n.sons[i], cl)
     of nkCase:
       var m = selectBranch(p, n)
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
       if m != nil: reprRecordAux(result, p, m, cl)
 
   proc reprRecord(result: var string, p: pointer, typ: PNimType,
@@ -227,11 +224,12 @@ when not defined(useNimRtl):
                cl: var ReprClosure) =
     # we know that p is not nil here:
     when declared(CellSet):
-      when defined(boehmGC) or defined(gogc) or defined(nogc):
+      when defined(boehmGC) or defined(gogc) or defined(nogc) or usesDestructors:
         var cell = cast[PCell](p)
       else:
         var cell = usrToCell(p)
-      add result, "ref " & reprPointer(p)
+      add result, if typ.kind == tyPtr: "ptr " else: "ref "
+      add result, reprPointer(p)
       if cell notin cl.marked:
         # only the address is shown:
         incl(cl.marked, cell)
@@ -284,8 +282,8 @@ when not defined(useNimRtl):
     of tyChar: add result, reprChar(cast[ptr char](p)[])
     of tyString:
       let sp = cast[ptr string](p)
-      reprStrAux(result, if sp[].isNil: nil else: sp[].cstring, sp[].len)
-    of tyCString:
+      reprStrAux(result, sp[].cstring, sp[].len)
+    of tyCstring:
       let cs = cast[ptr cstring](p)[]
       if cs.isNil: add result, "nil"
       else: reprStrAux(result, cs, cs.len)
@@ -293,22 +291,25 @@ when not defined(useNimRtl):
     of tyProc, tyPointer:
       if cast[PPointer](p)[] == nil: add result, "nil"
       else: add result, reprPointer(cast[PPointer](p)[])
+    of tyUncheckedArray:
+      add result, "[...]"
     else:
       add result, "(invalid data!)"
     inc(cl.recdepth)
 
-proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
-                   compilerRtl.} =
-  var
-    cl: ReprClosure
-  initReprClosure(cl)
-  result = "["
-  var bs = elemtyp.size
-  for i in 0..length - 1:
-    if i > 0: add result, ", "
-    reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), elemtyp, cl)
-  add result, "]"
-  deinitReprClosure(cl)
+when not defined(useNimRtl):
+  proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
+                     compilerRtl.} =
+    var
+      cl: ReprClosure
+    initReprClosure(cl)
+    result = "["
+    var bs = elemtyp.size
+    for i in 0..length - 1:
+      if i > 0: add result, ", "
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), elemtyp, cl)
+    add result, "]"
+    deinitReprClosure(cl)
 
 when not defined(useNimRtl):
   proc reprAny(p: pointer, typ: PNimType): string =
@@ -321,5 +322,6 @@ when not defined(useNimRtl):
     else:
       var p = p
       reprAux(result, addr(p), typ, cl)
-    add result, "\n"
+    when defined(nimLegacyReprWithNewline): # see PR #16034
+      add result, "\n"
     deinitReprClosure(cl)
diff --git a/lib/system/repr_impl.nim b/lib/system/repr_impl.nim
new file mode 100644
index 000000000..b9ec1890f
--- /dev/null
+++ b/lib/system/repr_impl.nim
@@ -0,0 +1,15 @@
+#[
+other APIs common to system/repr and system/reprjs could be refactored here, eg:
+* reprChar
+* reprBool
+* reprStr
+
+Another possibility in future work would be to have a single include file instead
+of system/repr and system/reprjs, and use `when defined(js)` inside it.
+]#
+
+proc reprDiscriminant*(e: int, typ: PNimType): string {.compilerRtl.} =
+  case typ.kind
+  of tyEnum: reprEnum(e, typ)
+  of tyBool: $(e != 0)
+  else: $e
diff --git a/lib/system/repr_v2.nim b/lib/system/repr_v2.nim
new file mode 100644
index 000000000..d2aef536c
--- /dev/null
+++ b/lib/system/repr_v2.nim
@@ -0,0 +1,194 @@
+include system/inclrtl
+
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
+proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
+  ## imported from typetraits
+
+proc distinctBase(T: typedesc, recursive: static bool = true): typedesc {.magic: "TypeTrait".}
+  ## imported from typetraits
+
+proc rangeBase(T: typedesc): typedesc {.magic: "TypeTrait".}
+  # skip one level of range; return the base type of a range type
+
+proc repr*(x: NimNode): string {.magic: "Repr", noSideEffect.}
+
+proc repr*(x: int): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: int64): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: uint64): string {.noSideEffect.} =
+  ## Same as $x
+  $x
+
+proc repr*(x: float): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
+  ## repr for a boolean argument. Returns `x`
+  ## converted to the string "false" or "true".
+
+proc repr*(x: char): string {.noSideEffect, raises: [].} =
+  ## repr for a character argument. Returns `x`
+  ## converted to an escaped string.
+  ##
+  ##   ```Nim
+  ##   assert repr('c') == "'c'"
+  ##   ```
+  result = "'"
+  # Elides string creations if not needed
+  if x in {'\\', '\0'..'\31', '\127'..'\255'}:
+    result.add '\\'
+  if x in {'\0'..'\31', '\127'..'\255'}:
+    result.add $x.uint8
+  else:
+    result.add x
+  result.add '\''
+
+proc repr*(x: string | cstring): string {.noSideEffect, raises: [].} =
+  ## repr for a string argument. Returns `x`
+  ## converted to a quoted and escaped string.
+  result = "\""
+  for i in 0..<x.len:
+    if x[i] in {'"', '\\', '\0'..'\31', '\127'..'\255'}:
+      result.add '\\'
+    case x[i]:
+    of '\n':
+      result.add "n\n"
+    of '\0'..'\9', '\11'..'\31', '\127'..'\255':
+      result.add $x[i].uint8
+    else:
+      result.add x[i]
+  result.add '\"'
+
+proc repr*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect, raises: [].}
+  ## repr for an enumeration argument. This works for
+  ## any enumeration type thanks to compiler magic.
+  ##
+  ## If a `repr` operator for a concrete enumeration is provided, this is
+  ## used instead. (In other words: *Overwriting* is possible.)
+
+proc reprDiscriminant*(e: int): string {.compilerproc.} =
+  # repr and reprjs can use `PNimType` to symbolize `e`; making this work here
+  # would require a way to pass the set of enum stringified values to cgen.
+  $e
+
+proc repr*(p: pointer): string =
+  ## repr of pointer as its hexadecimal value
+  if p == nil:
+    result = "nil"
+  else:
+    when nimvm:
+      result = "ptr"
+    else:
+      const HexChars = "0123456789ABCDEF"
+      const len = sizeof(pointer) * 2
+      var n = cast[uint](p)
+      result = newString(len)
+      for j in countdown(len-1, 0):
+        result[j] = HexChars[n and 0xF]
+        n = n shr 4
+
+proc repr*(p: proc | iterator {.closure.}): string =
+  ## repr of a proc as its address
+  repr(cast[ptr pointer](unsafeAddr p)[])
+
+template repr*[T: distinct|(range and not enum)](x: T): string =
+  when T is range: # add a branch to handle range
+    repr(rangeBase(typeof(x))(x))
+  elif T is distinct:
+    repr(distinctBase(typeof(x))(x))
+  else:
+    {.error: "cannot happen".}
+
+template repr*(t: typedesc): string = $t
+
+proc reprObject[T: tuple|object](res: var string, x: T) {.noSideEffect, raises: [].} =
+  res.add '('
+  var firstElement = true
+  const isNamed = T is object or isNamedTuple(T)
+  when not isNamed:
+    var count = 0
+  for name, value in fieldPairs(x):
+    if not firstElement: res.add(", ")
+    when isNamed:
+      res.add(name)
+      res.add(": ")
+    else:
+      count.inc
+    res.add repr(value)
+    firstElement = false
+  when not isNamed:
+    if count == 1:
+      res.add(',') # $(1,) should print as the semantically legal (1,)
+  res.add(')')
+
+
+proc repr*[T: tuple|object](x: T): string {.noSideEffect, raises: [].} =
+  ## Generic `repr` operator for tuples that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(23, 45) == "(23, 45)"
+  ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
+  ##   $() == "()"
+  ##   ```
+  when T is object:
+    result = $typeof(x)
+  reprObject(result, x)
+
+proc repr*[T](x: ref T | ptr T): string {.noSideEffect, raises: [].} =
+  if isNil(x): return "nil"
+  when T is object:
+    result = $typeof(x)
+    reprObject(result, x[])
+  else:
+    result = when typeof(x) is ref: "ref " else: "ptr "
+    result.add repr(x[])
+
+proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string {.noSideEffect, raises: [].} =
+  result = prefix
+  var firstElement = true
+  for value in items(x):
+    if firstElement:
+      firstElement = false
+    else:
+      result.add(separator)
+    result.add repr(value)
+  result.add(suffix)
+
+proc repr*[T](x: set[T]): string =
+  ## Generic `repr` operator for sets that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   ${23, 45} == "{23, 45}"
+  ##   ```
+  collectionToRepr(x, "{", ", ", "}")
+
+proc repr*[T](x: seq[T]): string =
+  ## Generic `repr` operator for seqs that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
+  collectionToRepr(x, "@[", ", ", "]")
+
+proc repr*[T, IDX](x: array[IDX, T]): string =
+  ## Generic `repr` operator for arrays that is lifted from the components.
+  collectionToRepr(x, "[", ", ", "]")
+
+proc repr*[T](x: openArray[T]): string =
+  ## Generic `repr` operator for openarrays that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
+  collectionToRepr(x, "[", ", ", "]")
+
+proc repr*[T](x: UncheckedArray[T]): string =
+  "[...]"
diff --git a/lib/system/reprjs.nim b/lib/system/reprjs.nim
index d04d6e12b..761d66aec 100644
--- a/lib/system/reprjs.nim
+++ b/lib/system/reprjs.nim
@@ -8,47 +8,45 @@
 #
 # The generic ``repr`` procedure for the javascript backend.
 
-proc reprInt(x: int64): string {.compilerproc.} = return $x
-proc reprFloat(x: float): string {.compilerproc.} =
-  # Js toString doesn't differentiate between 1.0 and 1,
-  # but we do.
-  if $x == $(x.int): $x & ".0"
-  else: $x
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
+proc reprInt(x: int64): string {.compilerproc.} = $x
+proc reprInt(x: uint64): string {.compilerproc.} = $x
+proc reprInt(x: int): string {.compilerproc.} = $x
+proc reprFloat(x: float): string {.compilerproc.} = $x
 
 proc reprPointer(p: pointer): string {.compilerproc.} =
   # Do we need to generate the full 8bytes ? In js a pointer is an int anyway
   var tmp: int
-  {. emit: """
-    if (`p`_Idx == null) {
-      `tmp` = 0;
-    } else {
-      `tmp` = `p`_Idx;
-    }
-  """ .}
+  {.emit: "`tmp` = `p`_Idx || 0;".}
   result = $tmp
 
 proc reprBool(x: bool): string {.compilerRtl.} =
   if x: result = "true"
   else: result = "false"
 
-proc isUndefined[T](x: T): bool {.inline.} = {.emit: "`result` = `x` === undefined;"}
-
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
-  if not typ.node.sons[e].isUndefined:
-    result = makeNimstrLit(typ.node.sons[e].name)
+  var tmp: bool
+  let item = typ.node.sons[e]
+  {.emit: "`tmp` = `item` !== undefined;".}
+  if tmp:
+    result = makeNimstrLit(item.name)
   else:
     result = $e & " (invalid data!)"
 
+include system/repr_impl
+
 proc reprChar(x: char): string {.compilerRtl.} =
   result = "\'"
   case x
   of '"': add(result, "\\\"")
   of '\\': add(result, "\\\\")
-  of '\127'..'\255', '\0'..'\31': add( result, "\\" & reprInt(ord(x)) )
+  of '\127'..'\255', '\0'..'\31': add(result, "\\" & reprInt(ord(x)))
   else: add(result, x)
   add(result, "\'")
 
-proc reprStrAux(result: var string, s: cstring, len: int) =
+proc reprStrAux(result: var string, s: cstring | string, len: int) =
   add(result, "\"")
   for i in 0 .. len-1:
     let c = s[i]
@@ -63,15 +61,7 @@ proc reprStrAux(result: var string, s: cstring, len: int) =
   add(result, "\"")
 
 proc reprStr(s: string): string {.compilerRtl.} =
-  result = ""
-  if cast[pointer](s).isNil:
-    # Handle nil strings here because they don't have a length field in js
-    # TODO: check for null/undefined before generating call to length in js?
-    # Also: c backend repr of a nil string is <pointer>"", but repr of an
-    # array of string that is not initialized is [nil, nil, ...] ??
-    add(result, "nil")
-  else:
-    reprStrAux(result, s, s.len)
+  reprStrAux(result, s, s.len)
 
 proc addSetElem(result: var string, elem: int, typ: PNimType) =
   # Dispatch each set element to the correct repr<Type> proc
@@ -112,7 +102,6 @@ proc reprSetAux(result: var string, s: int, typ: PNimType) =
   add(result, "}")
 
 proc reprSet(e: int, typ: PNimType): string {.compilerRtl.} =
-  result = ""
   reprSetAux(result, e, typ)
 
 type
@@ -128,24 +117,9 @@ proc reprAux(result: var string, p: pointer, typ: PNimType, cl: var ReprClosure)
 
 proc reprArray(a: pointer, typ: PNimType,
               cl: var ReprClosure): string {.compilerRtl.} =
-  var isNilArrayOrSeq: bool
-  # isnil is not enough here as it would try to deref `a` without knowing what's inside
-  {. emit: """
-    if (`a` == null) {
-      `isNilArrayOrSeq` = true;
-    } else if (`a`[0] == null) {
-      `isNilArrayOrSeq` = true;
-    } else {
-      `isNilArrayOrSeq` = false;
-    };
-    """ .}
-  if typ.kind == tySequence and isNilArrayOrSeq:
-    return "nil"
-
   # We prepend @ to seq, the C backend prepends the pointer to the seq.
   result = if typ.kind == tySequence: "@[" else: "["
   var len: int = 0
-  var i: int = 0
 
   {. emit: "`len` = `a`.length;\n" .}
   var dereffed: pointer = a
@@ -161,15 +135,15 @@ proc reprArray(a: pointer, typ: PNimType,
 
   add(result, "]")
 
-proc isPointedToNil(p: pointer): bool {.inline.}=
-  {. emit: "if (`p` === null) {`result` = true};\n" .}
+proc isPointedToNil(p: pointer): bool =
+  {. emit: "if (`p` === null) {`result` = true;}\n" .}
 
 proc reprRef(result: var string, p: pointer, typ: PNimType,
           cl: var ReprClosure) =
   if p.isPointedToNil:
-    add(result , "nil")
+    add(result, "nil")
     return
-  add( result, "ref " & reprPointer(p) )
+  add(result, "ref " & reprPointer(p))
   add(result, " --> ")
   if typ.base.kind != tyArray:
     {. emit: """
@@ -182,8 +156,8 @@ proc reprRef(result: var string, p: pointer, typ: PNimType,
 proc reprRecordAux(result: var string, o: pointer, typ: PNimType, cl: var ReprClosure) =
   add(result, "[")
 
-  var first: bool = true
-  var val: pointer = o
+  var first = true
+  var val = o
   if typ.node.len == 0:
     # if the object has only one field, len is 0  and sons is nil, the field is in node
     let key: cstring = typ.node.name
@@ -203,11 +177,10 @@ proc reprRecordAux(result: var string, o: pointer, typ: PNimType, cl: var ReprCl
   add(result, "]")
 
 proc reprRecord(o: pointer, typ: PNimType, cl: var ReprClosure): string {.compilerRtl.} =
-  result = ""
-  reprRecordAux(result, o, typ,cl)
+  reprRecordAux(result, o, typ, cl)
 
 
-proc reprJSONStringify(p: int): string {.compilerRtl.} =
+proc reprJsonStringify(p: int): string {.compilerRtl.} =
   # As a last resort, use stringify
   # We use this for tyOpenArray, tyVarargs while genTypeInfo is not implemented
   var tmp: cstring
@@ -221,22 +194,23 @@ proc reprAux(result: var string, p: pointer, typ: PNimType,
     return
   dec(cl.recDepth)
   case typ.kind
-  of tyInt..tyInt64, tyUInt..tyUInt64:
-    add( result, reprInt(cast[int](p)) )
+  of tyInt..tyInt32, tyUInt..tyUInt32:
+    add(result, reprInt(cast[int](p)))
+  of tyInt64:
+    add(result, reprInt(cast[int64](p)))
+  of tyUInt64:
+    add(result, reprInt(cast[uint64](p)))
   of tyChar:
-    add( result, reprChar(cast[char](p)) )
+    add(result, reprChar(cast[char](p)))
   of tyBool:
-    add( result, reprBool(cast[bool](p)) )
+    add(result, reprBool(cast[bool](p)))
   of tyFloat..tyFloat128:
-    add( result, reprFloat(cast[float](p)) )
+    add(result, reprFloat(cast[float](p)))
   of tyString:
     var fp: int
     {. emit: "`fp` = `p`;\n" .}
-    if cast[string](fp).isNil:
-      add(result, "nil")
-    else:
-      add( result, reprStr(cast[string](p)) )
-  of tyCString:
+    add(result, reprStr(cast[string](p)))
+  of tyCstring:
     var fp: cstring
     {. emit: "`fp` = `p`;\n" .}
     if fp.isNil:
@@ -266,12 +240,12 @@ proc reprAux(result: var string, p: pointer, typ: PNimType,
     else:
       add(result, reprPointer(p))
   else:
-    add( result, "(invalid data!)" & reprJsonStringify(cast[int](p)) )
+    add(result, "(invalid data!)" & reprJsonStringify(cast[int](p)))
   inc(cl.recDepth)
 
 proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.} =
   var cl: ReprClosure
   initReprClosure(cl)
-  result = ""
   reprAux(result, p, typ, cl)
-  add(result, "\n")
\ No newline at end of file
+  when defined(nimLegacyReprWithNewline): # see PR #16034
+    add result, "\n"
diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim
new file mode 100644
index 000000000..572e77408
--- /dev/null
+++ b/lib/system/seqs_v2.nim
@@ -0,0 +1,227 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2017 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+
+# import std/typetraits
+# strs already imported allocateds for us.
+
+
+# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
+{.push warning[StrictNotNil]: off.}  # See https://github.com/nim-lang/Nim/issues/21401
+
+
+## Default seq implementation used by Nim's core.
+type
+  NimSeqPayloadBase = object
+    cap: int
+
+  NimSeqPayload[T] = object
+    cap: int
+    data: UncheckedArray[T]
+
+  NimSeqV2*[T] = object # \
+    # if you change this implementation, also change seqs_v2_reimpl.nim!
+    len: int
+    p: ptr NimSeqPayload[T]
+
+  NimRawSeq = object
+    len: int
+    p: pointer
+
+const nimSeqVersion {.core.} = 2
+
+# XXX make code memory safe for overflows in '*'
+
+proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # we have to use type erasure here as Nim does not support generic
+  # compilerProcs. Oh well, this will all be inlined anyway.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
+proc newSeqPayloadUninit(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # Used in `newSeqOfCap()`.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
+template `+!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) +% s)
+
+template `-!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) -% s)
+
+proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayload(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
+      var q: ptr NimSeqPayloadBase
+      if (p.cap and strlitFlag) == strlitFlag:
+        q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+
+      zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+      q.cap = newCap
+      result = q
+
+proc zeroNewElements(len: int; q: pointer; addlen, elemSize, elemAlign: int) {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+
+proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayloadUninit(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
+      if (p.cap and strlitFlag) == strlitFlag:
+        var q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+        q.cap = newCap
+        result = q
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        var q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+        q.cap = newCap
+        result = q
+
+proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [].} =
+  when nimvm:
+    {.cast(tags: []).}:
+      setLen(x, newLen)
+  else:
+    #sysAssert newLen <= x.len, "invalid newLen parameter for 'shrink'"
+    when not supportsCopyMem(T):
+      for i in countdown(x.len - 1, newLen):
+        reset x[i]
+    # XXX This is wrong for const seqs that were moved into 'x'!
+    {.noSideEffect.}:
+      cast[ptr NimSeqV2[T]](addr x).len = newLen
+
+proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
+  let oldLen = x.len
+  #sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
+  if newLen <= oldLen: return
+  var xu = cast[ptr NimSeqV2[T]](addr x)
+  if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
+    xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
+  xu.len = newLen
+  for i in oldLen .. newLen-1:
+    wasMoved(xu.p.data[i])
+    `=copy`(xu.p.data[i], value)
+
+proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
+  ## Generic proc for adding a data item `y` to a container `x`.
+  ##
+  ## For containers that have an order, `add` means *append*. New generic
+  ## containers should also call their adding proc `add` for consistency.
+  ## Generic code becomes much easier to write if the Nim naming scheme is
+  ## respected.
+  {.cast(noSideEffect).}:
+    let oldLen = x.len
+    var xu = cast[ptr NimSeqV2[T]](addr x)
+    if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
+      xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
+    xu.len = oldLen+1
+    # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
+    # copyMem(). This is fine as know by construction that
+    # in `xu.p.data[oldLen]` there is nothing to destroy.
+    # We also save the `wasMoved + destroy` pair for the sink parameter.
+    xu.p.data[oldLen] = y
+
+proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+      for i in oldLen..<newlen:
+        xu.p.data[i] = default(T)
+
+proc newSeq[T](s: var seq[T], len: Natural) =
+  shrink(s, 0)
+  setLen(s, len)
+
+proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
+  result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
+
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[ptr NimSeqV2[T]](unsafeAddr self)
+  result = if sek.p != nil: sek.p.cap and not strlitFlag else: 0
+
+func setLenUninit*[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  ## Sets the length of seq `s` to `newlen`. `T` may be any sequence type.
+  ## New slots will not be initialized.
+  ##
+  ## If the current length is greater than the new length,
+  ## `s` will be truncated.
+  ##   ```nim
+  ##   var x = @[10, 20]
+  ##   x.setLenUninit(5)
+  ##   x[4] = 50
+  ##   assert x[4] == 50
+  ##   x.setLenUninit(1)
+  ##   assert x == @[10]
+  ##   ```
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+
+{.pop.}  # See https://github.com/nim-lang/Nim/issues/21401
diff --git a/lib/system/seqs_v2_reimpl.nim b/lib/system/seqs_v2_reimpl.nim
new file mode 100644
index 000000000..09b7e7ac4
--- /dev/null
+++ b/lib/system/seqs_v2_reimpl.nim
@@ -0,0 +1,24 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+type
+  NimSeqPayloadReimpl = object
+    cap: int
+    data: pointer
+
+  NimSeqV2Reimpl = object
+    len: int
+    p: ptr NimSeqPayloadReimpl
+
+template frees(s: NimSeqV2Reimpl) =
+  if s.p != nil and (s.p.cap and strlitFlag) != strlitFlag:
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
\ No newline at end of file
diff --git a/lib/system/setops.nim b/lib/system/setops.nim
new file mode 100644
index 000000000..67aa3097a
--- /dev/null
+++ b/lib/system/setops.nim
@@ -0,0 +1,89 @@
+func incl*[T](x: var set[T], y: T) {.magic: "Incl".} =
+  ## Includes element `y` in the set `x`.
+  ##
+  ## This is the same as `x = x + {y}`, but it might be more efficient.
+  runnableExamples:
+    var a = {1, 3, 5}
+    a.incl(2)
+    assert a == {1, 2, 3, 5}
+    a.incl(4)
+    assert a == {1, 2, 3, 4, 5}
+
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template incl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Includes the set `y` in the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {4, 5, 6}
+    a.incl(b)
+    assert a == {1, 3, 4, 5, 6, 7}
+  x = x + y
+
+func excl*[T](x: var set[T], y: T) {.magic: "Excl".} =
+  ## Excludes element `y` from the set `x`.
+  ##
+  ## This is the same as `x = x - {y}`, but it might be more efficient.
+  runnableExamples:
+    var b = {2, 3, 5, 6, 12, 54}
+    b.excl(5)
+    assert b == {2, 3, 6, 12, 54}
+
+template excl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Excludes the set `y` from the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {3, 4, 5}
+    a.excl(b) 
+    assert a == {1, 7}
+  x = x - y
+
+func card*[T](x: set[T]): int {.magic: "Card".} =
+  ## Returns the cardinality of the set `x`, i.e. the number of elements
+  ## in the set.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    assert card(a) == 4
+    var b = {1, 3, 5, 7, 5}
+    assert card(b) == 4 # repeated 5 doesn't count
+
+func len*[T](x: set[T]): int {.magic: "Card".}
+  ## An alias for `card(x)`.
+
+
+func `*`*[T](x, y: set[T]): set[T] {.magic: "MulSet".} =
+  ## This operator computes the intersection of two sets.
+  runnableExamples:
+    assert {1, 2, 3} * {2, 3, 4} == {2, 3}
+
+func `+`*[T](x, y: set[T]): set[T] {.magic: "PlusSet".} =
+  ## This operator computes the union of two sets.
+  runnableExamples:
+    assert {1, 2, 3} + {2, 3, 4} == {1, 2, 3, 4}
+
+func `-`*[T](x, y: set[T]): set[T] {.magic: "MinusSet".} =
+  ## This operator computes the difference of two sets.
+  runnableExamples:
+    assert {1, 2, 3} - {2, 3, 4} == {1}
+
+func contains*[T](x: set[T], y: T): bool {.magic: "InSet".} =
+  ## One should overload this proc if one wants to overload the `in` operator.
+  ##
+  ## The parameters are in reverse order! `a in b` is a template for
+  ## `contains(b, a)`.
+  ## This is because the unification algorithm that Nim uses for overload
+  ## resolution works from left to right.
+  ## But for the `in` operator that would be the wrong direction for this
+  ## piece of code:
+  runnableExamples:
+    var s: set[range['a'..'z']] = {'a'..'c'}
+    assert s.contains('c')
+    assert 'b' in s
+    assert 'd' notin s
+    assert set['a'..'z'] is set[range['a'..'z']]
+  ## If `in` had been declared as `[T](elem: T, s: set[T])` then `T` would
+  ## have been bound to `char`. But `s` is not compatible to type
+  ## `set[char]`! The solution is to bind `T` to `range['a'..'z']`. This
+  ## is achieved by reversing the parameters for `contains`; `in` then
+  ## passes its arguments in reverse order.
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 53d222468..97431c296 100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -9,21 +9,20 @@
 
 # set handling
 
-type
-  NimSet = array[0..4*2048-1, uint8]
-{.deprecated: [TNimSet: NimSet].}
 
-proc countBits32(n: int32): int {.compilerproc.} =
-  var v = n
-  v = v -% ((v shr 1'i32) and 0x55555555'i32)
-  v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32)
-  result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32
+proc cardSetImpl(s: ptr UncheckedArray[uint8], len: int): int {.inline.} =
+  var i = 0
+  result = 0
+  var num = 0'u64
+  when defined(x86) or defined(amd64):
+    while i < len - 8:
+      copyMem(addr num, addr s[i], 8)
+      inc(result, countBits64(num))
+      inc(i, 8)
 
-proc countBits64(n: int64): int {.compilerproc.} =
-  result = countBits32(toU32(n and 0xffffffff'i64)) +
-           countBits32(toU32(n shr 32'i64))
+  while i < len:
+    inc(result, countBits32(uint32(s[i])))
+    inc(i, 1)
 
-proc cardSet(s: NimSet, len: int): int {.compilerproc.} =
-  result = 0
-  for i in countup(0, len-1):
-    inc(result, countBits32(int32(s[i])))
+proc cardSet(s: ptr UncheckedArray[uint8], len: int): int {.compilerproc, inline.} =
+  result = cardSetImpl(s, len)
diff --git a/lib/system/stacktraces.nim b/lib/system/stacktraces.nim
new file mode 100644
index 000000000..42be9d94f
--- /dev/null
+++ b/lib/system/stacktraces.nim
@@ -0,0 +1,83 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Additional code for customizable stack traces. Unstable API, for internal
+# usage only.
+
+const
+  reraisedFromBegin* = -10
+  reraisedFromEnd* = -100
+  maxStackTraceLines* = 128
+
+when defined(nimStackTraceOverride):
+  ## Procedure types for overriding the default stack trace.
+  type
+    cuintptr_t* {.importc: "uintptr_t", nodecl.} = uint
+      ## This is the same as the type `uintptr_t` in C.
+
+    StackTraceOverrideGetTracebackProc* = proc (): string {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetProgramCountersProc* = proc (maxLength: cint): seq[cuintptr_t] {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetDebuggingInfoProc* =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.}
+
+  # Default procedures (not normally used, because people opting in on this
+  # override are supposed to register their own versions).
+  var
+    stackTraceOverrideGetTraceback: StackTraceOverrideGetTracebackProc =
+      proc (): string {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+        #result = "Stack trace override procedure not registered.\n"
+    stackTraceOverrideGetProgramCounters: StackTraceOverrideGetProgramCountersProc =
+      proc (maxLength: cint): seq[cuintptr_t] {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+    stackTraceOverrideGetDebuggingInfo: StackTraceOverrideGetDebuggingInfoProc =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.} =
+          discard
+
+  # Custom procedure registration.
+  proc registerStackTraceOverride*(overrideProc: StackTraceOverrideGetTracebackProc) =
+    ## Override the default stack trace inside rawWriteStackTrace() with your
+    ## own procedure.
+    stackTraceOverrideGetTraceback = overrideProc
+  proc registerStackTraceOverrideGetProgramCounters*(overrideProc: StackTraceOverrideGetProgramCountersProc) =
+    stackTraceOverrideGetProgramCounters = overrideProc
+  proc registerStackTraceOverrideGetDebuggingInfo*(overrideProc: StackTraceOverrideGetDebuggingInfoProc) =
+    stackTraceOverrideGetDebuggingInfo = overrideProc
+
+  # Custom stack trace manipulation.
+  proc auxWriteStackTraceWithOverride*(s: var string) =
+    add(s, stackTraceOverrideGetTraceback())
+
+  proc auxWriteStackTraceWithOverride*(s: var seq[StackTraceEntry]) =
+    let programCounters = stackTraceOverrideGetProgramCounters(maxStackTraceLines)
+    if s.len == 0:
+      s = newSeqOfCap[StackTraceEntry](programCounters.len)
+    for programCounter in programCounters:
+      s.add(StackTraceEntry(programCounter: cast[uint](programCounter)))
+
+  # We may have more stack trace lines in the output, due to inlined procedures.
+  proc addDebuggingInfo*(s: seq[StackTraceEntry]): seq[StackTraceEntry] =
+    var programCounters: seq[cuintptr_t]
+    # We process program counters in groups from complete stack traces, because
+    # we have logic that keeps track of certain functions being inlined or not.
+    for entry in s:
+      if entry.procname.isNil and entry.programCounter != 0:
+        programCounters.add(cast[cuintptr_t](entry.programCounter))
+      elif entry.procname.isNil and (entry.line == reraisedFromBegin or entry.line == reraisedFromEnd):
+        result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
+        programCounters = @[]
+        result.add(entry)
+      else:
+        result.add(entry)
+    if programCounters.len > 0:
+      result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
diff --git a/lib/system/strmantle.nim b/lib/system/strmantle.nim
new file mode 100644
index 000000000..89046253b
--- /dev/null
+++ b/lib/system/strmantle.nim
@@ -0,0 +1,263 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2018 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Compilerprocs for strings that do not depend on the string implementation.
+
+import std/private/digitsutils
+
+
+proc cmpStrings(a, b: string): int {.inline, compilerproc.} =
+  let alen = a.len
+  let blen = b.len
+  let minlen = min(alen, blen)
+  if minlen > 0:
+    result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen)).int
+    if result == 0:
+      result = alen - blen
+  else:
+    result = alen - blen
+
+proc leStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) <= 0
+
+proc ltStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) < 0
+
+proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
+  let alen = a.len
+  let blen = b.len
+  if alen == blen:
+    if alen == 0: return true
+    return equalMem(unsafeAddr(a[0]), unsafeAddr(b[0]), alen)
+
+proc hashString(s: string): int {.compilerproc.} =
+  # the compiler needs exactly the same hash function!
+  # this used to be used for efficient generation of string case statements
+  var h = 0'u
+  for i in 0..len(s)-1:
+    h = h + uint(s[i])
+    h = h + h shl 10
+    h = h xor (h shr 6)
+  h = h + h shl 3
+  h = h xor (h shr 11)
+  h = h + h shl 15
+  result = cast[int](h)
+
+proc eqCstrings(a, b: cstring): bool {.inline, compilerproc.} =
+  if pointer(a) == pointer(b): result = true
+  elif a.isNil or b.isNil: result = false
+  else: result = c_strcmp(a, b) == 0
+
+proc hashCstring(s: cstring): int {.compilerproc.} =
+  # the compiler needs exactly the same hash function!
+  # this used to be used for efficient generation of cstring case statements
+  if s.isNil: return 0
+  var h : uint = 0
+  var i = 0
+  while true:
+    let c = s[i]
+    if c == '\0': break
+    h = h + uint(c)
+    h = h + h shl 10
+    h = h xor (h shr 6)
+    inc i
+  h = h + h shl 3
+  h = h xor (h shr 11)
+  h = h + h shl 15
+  result = cast[int](h)
+
+proc c_strtod(buf: cstring, endptr: ptr cstring): float64 {.
+  importc: "strtod", header: "<stdlib.h>", noSideEffect.}
+
+const
+  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  powtens =  [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+              1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+              1e20, 1e21, 1e22]
+
+
+{.push staticBoundChecks: off.}
+
+proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
+                         ): int {.compilerproc.} =
+  # This routine attempt to parse float that can parsed quickly.
+  # i.e. whose integer part can fit inside a 53bits integer.
+  # their real exponent must also be <= 22. If the float doesn't follow
+  # these restrictions, transform the float into this form:
+  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
+  # This avoid the problems of decimal character portability.
+  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  var
+    i = 0
+    sign = 1.0
+    kdigits, fdigits = 0
+    exponent = 0
+    integer = uint64(0)
+    fracExponent = 0
+    expSign = 1
+    firstDigit = -1
+    hasSign = false
+
+  # Sign?
+  if i < s.len and (s[i] == '+' or s[i] == '-'):
+    hasSign = true
+    if s[i] == '-':
+      sign = -1.0
+    inc(i)
+
+  # NaN?
+  if i+2 < s.len and (s[i] == 'N' or s[i] == 'n'):
+    if s[i+1] == 'A' or s[i+1] == 'a':
+      if s[i+2] == 'N' or s[i+2] == 'n':
+        if i+3 >= s.len or s[i+3] notin IdentChars:
+          number = NaN
+          return i+3
+    return 0
+
+  # Inf?
+  if i+2 < s.len and (s[i] == 'I' or s[i] == 'i'):
+    if s[i+1] == 'N' or s[i+1] == 'n':
+      if s[i+2] == 'F' or s[i+2] == 'f':
+        if i+3 >= s.len or s[i+3] notin IdentChars:
+          number = Inf*sign
+          return i+3
+    return 0
+
+  if i < s.len and s[i] in {'0'..'9'}:
+    firstDigit = (s[i].ord - '0'.ord)
+  # Integer part?
+  while i < s.len and s[i] in {'0'..'9'}:
+    inc(kdigits)
+    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+    inc(i)
+    while i < s.len and s[i] == '_': inc(i)
+
+  # Fractional part?
+  if i < s.len and s[i] == '.':
+    inc(i)
+    # if no integer part, Skip leading zeros
+    if kdigits <= 0:
+      while i < s.len and s[i] == '0':
+        inc(fracExponent)
+        inc(i)
+        while i < s.len and s[i] == '_': inc(i)
+
+    if firstDigit == -1 and i < s.len and s[i] in {'0'..'9'}:
+      firstDigit = (s[i].ord - '0'.ord)
+    # get fractional part
+    while i < s.len and s[i] in {'0'..'9'}:
+      inc(fdigits)
+      inc(fracExponent)
+      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+      inc(i)
+      while i < s.len and s[i] == '_': inc(i)
+
+  # if has no digits: return error
+  if kdigits + fdigits <= 0 and
+     (i == 0 or # no char consumed (empty string).
+     (i == 1 and hasSign)): # or only '+' or '-
+    return 0
+
+  if i+1 < s.len and s[i] in {'e', 'E'}:
+    inc(i)
+    if s[i] == '+' or s[i] == '-':
+      if s[i] == '-':
+        expSign = -1
+
+      inc(i)
+    if s[i] notin {'0'..'9'}:
+      return 0
+    while i < s.len and s[i] in {'0'..'9'}:
+      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
+      inc(i)
+      while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
+
+  var realExponent = expSign*exponent - fracExponent
+  let expNegative = realExponent < 0
+  var absExponent = abs(realExponent)
+
+  # if exponent greater than can be represented: +/- zero or infinity
+  if absExponent > 999:
+    if integer == 0:
+      number = 0.0
+    elif expNegative:
+      number = 0.0*sign
+    else:
+      number = Inf*sign
+    return i
+
+  # if integer is representable in 53 bits:  fast path
+  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
+  let digits = kdigits + fdigits
+  if digits <= 15 or (digits <= 16 and firstDigit <= 8):
+    # max float power of ten with set bits above the 53th bit is 10^22
+    if absExponent <= 22:
+      if expNegative:
+        number = sign * integer.float / powtens[absExponent]
+      else:
+        number = sign * integer.float * powtens[absExponent]
+      return i
+
+    # if exponent is greater try to fit extra exponent above 22 by multiplying
+    # integer part is there is space left.
+    let slop = 15 - kdigits - fdigits
+    if absExponent <= 22 + slop and not expNegative:
+      number = sign * integer.float * powtens[slop] * powtens[absExponent-slop]
+      return i
+
+  # if failed: slow path with strtod.
+  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
+  var ti = 0
+  let maxlen = t.high - "e+000".len # reserve enough space for exponent
+
+  let endPos = i
+  result = endPos
+  i = 0
+  # re-parse without error checking, any error should be handled by the code above.
+  if i < endPos and s[i] == '.': i.inc
+  while i < endPos and s[i] in {'0'..'9','+','-'}:
+    if ti < maxlen:
+      t[ti] = s[i]; inc(ti)
+    inc(i)
+    while i < endPos and s[i] in {'.', '_'}: # skip underscore and decimal point
+      inc(i)
+
+  # insert exponent
+  t[ti] = 'E'
+  inc(ti)
+  t[ti] = if expNegative: '-' else: '+'
+  inc(ti, 4)
+
+  # insert adjusted exponent
+  t[ti-1] = ('0'.ord + absExponent mod 10).char
+  absExponent = absExponent div 10
+  t[ti-2] = ('0'.ord + absExponent mod 10).char
+  absExponent = absExponent div 10
+  t[ti-3] = ('0'.ord + absExponent mod 10).char
+  number = c_strtod(cast[cstring](addr t), nil)
+
+{.pop.} # staticBoundChecks
+
+proc nimBoolToStr(x: bool): string {.compilerRtl.} =
+  return if x: "true" else: "false"
+
+proc nimCharToStr(x: char): string {.compilerRtl.} =
+  result = newString(1)
+  result[0] = x
+
+when defined(gcDestructors):
+  proc GC_getStatistics*(): string =
+    result = "[GC] total memory: "
+    result.addInt getTotalMem()
+    result.add "\n[GC] occupied memory: "
+    result.addInt getOccupiedMem()
+    result.add '\n'
+    #"[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim
new file mode 100644
index 000000000..404b4f78d
--- /dev/null
+++ b/lib/system/strs_v2.nim
@@ -0,0 +1,224 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2017 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Default new string implementation used by Nim's core.
+
+type
+  NimStrPayloadBase = object
+    cap: int
+
+  NimStrPayload {.core.} = object
+    cap: int
+    data: UncheckedArray[char]
+
+  NimStringV2 {.core.} = object
+    len: int
+    p: ptr NimStrPayload ## can be nil if len == 0.
+
+const nimStrVersion {.core.} = 2
+
+template isLiteral(s): bool = (s.p == nil) or (s.p.cap and strlitFlag) == strlitFlag
+
+template contentSize(cap): int = cap + 1 + sizeof(NimStrPayloadBase)
+
+template frees(s) =
+  if not isLiteral(s):
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
+
+template allocPayload(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc(contentSize(newLen)))
+
+template allocPayload0(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared0(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc0(contentSize(newLen)))
+
+template reallocPayload(p: pointer, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared(p, contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc(p, contentSize(newLen)))
+
+template reallocPayload0(p: pointer; oldLen, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared0(p, contentSize(oldLen), contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc0(p, contentSize(oldLen), contentSize(newLen)))
+
+proc resize(old: int): int {.inline.} =
+  if old <= 0: result = 4
+  elif old <= high(int16): result = old * 2
+  else: result = old * 3 div 2 # for large arrays * 3/2 is better
+
+proc prepareAdd(s: var NimStringV2; addLen: int) {.compilerRtl.} =
+  let newLen = s.len + addLen
+  if isLiteral(s):
+    let oldP = s.p
+    # can't mutate a literal, so we need a fresh copy here:
+    s.p = allocPayload(newLen)
+    s.p.cap = newLen
+    if s.len > 0:
+      # we are about to append, so there is no need to copy the \0 terminator:
+      copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+    elif oldP == nil:
+      # In the case of `newString(0) & ""`, since `src.len == 0`, `appendString`
+      # will not set the `\0` terminator, so we set it here.
+      s.p.data[0] = '\0'
+  else:
+    let oldCap = s.p.cap and not strlitFlag
+    if newLen > oldCap:
+      let newCap = max(newLen, resize(oldCap))
+      s.p = reallocPayload(s.p, newCap)
+      s.p.cap = newCap
+      if newLen < newCap:
+        zeroMem(cast[pointer](addr s.p.data[newLen+1]), newCap - newLen)
+
+proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, 1)
+  s.p.data[s.len] = c
+  inc s.len
+  s.p.data[s.len] = '\0'
+
+proc toNimStr(str: cstring, len: int): NimStringV2 {.compilerproc.} =
+  if len <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload(len)
+    p.cap = len
+    copyMem(unsafeAddr p.data[0], str, len+1)
+    result = NimStringV2(len: len, p: p)
+
+proc cstrToNimstr(str: cstring): NimStringV2 {.compilerRtl.} =
+  if str == nil: toNimStr(str, 0)
+  else: toNimStr(str, str.len)
+
+proc nimToCStringConv(s: NimStringV2): cstring {.compilerproc, nonReloadable, inline.} =
+  if s.len == 0: result = cstring""
+  else: result = cast[cstring](unsafeAddr s.p.data)
+
+proc appendString(dest: var NimStringV2; src: NimStringV2) {.compilerproc, inline.} =
+  if src.len > 0:
+    # also copy the \0 terminator:
+    copyMem(unsafeAddr dest.p.data[dest.len], unsafeAddr src.p.data[0], src.len+1)
+    inc dest.len, src.len
+
+proc appendChar(dest: var NimStringV2; c: char) {.compilerproc, inline.} =
+  dest.p.data[dest.len] = c
+  inc dest.len
+  dest.p.data[dest.len] = '\0'
+
+proc rawNewString(space: int): NimStringV2 {.compilerproc.} =
+  # this is also 'system.newStringOfCap'.
+  if space <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload(space)
+    p.cap = space
+    p.data[0] = '\0'
+    result = NimStringV2(len: 0, p: p)
+
+proc mnewString(len: int): NimStringV2 {.compilerproc.} =
+  if len <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload0(len)
+    p.cap = len
+    result = NimStringV2(len: len, p: p)
+
+proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
+  if newLen == 0:
+    discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
+  else:
+    if isLiteral(s):
+      let oldP = s.p
+      s.p = allocPayload(newLen)
+      s.p.cap = newLen
+      if s.len > 0:
+        copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+        if newLen > s.len:
+          zeroMem(cast[pointer](addr s.p.data[s.len]), newLen - s.len + 1)
+        else:
+          s.p.data[newLen] = '\0'
+      else:
+        zeroMem(cast[pointer](addr s.p.data[0]), newLen + 1)
+    elif newLen > s.len:
+      let oldCap = s.p.cap and not strlitFlag
+      if newLen > oldCap:
+        let newCap = max(newLen, resize(oldCap))
+        s.p = reallocPayload0(s.p, oldCap, newCap)
+        s.p.cap = newCap
+
+    s.p.data[newLen] = '\0'
+  s.len = newLen
+
+proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
+  if a.p == b.p and a.len == b.len: return
+  if isLiteral(b):
+    # we can shallow copy literals:
+    frees(a)
+    a.len = b.len
+    a.p = b.p
+  else:
+    if isLiteral(a) or (a.p.cap and not strlitFlag) < b.len:
+      # we have to allocate the 'cap' here, consider
+      # 'let y = newStringOfCap(); var x = y'
+      # on the other hand... These get turned into moves now.
+      frees(a)
+      a.p = allocPayload(b.len)
+      a.p.cap = b.len
+    a.len = b.len
+    copyMem(unsafeAddr a.p.data[0], unsafeAddr b.p.data[0], b.len+1)
+
+proc nimPrepareStrMutationImpl(s: var NimStringV2) =
+  let oldP = s.p
+  # can't mutate a literal, so we need a fresh copy here:
+  s.p = allocPayload(s.len)
+  s.p.cap = s.len
+  copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1)
+
+proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl.} =
+  if s.p != nil and (s.p.cap and strlitFlag) == strlitFlag:
+    nimPrepareStrMutationImpl(s)
+
+proc prepareMutation*(s: var string) {.inline.} =
+  # string literals are "copy on write", so you need to call
+  # `prepareMutation` before modifying the strings via `addr`.
+  {.cast(noSideEffect).}:
+    let s = unsafeAddr s
+    nimPrepareStrMutationV2(cast[ptr NimStringV2](s)[])
+
+proc nimAddStrV1(s: var NimStringV2; src: NimStringV2) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, src.len)
+  appendString s, src
+
+proc nimDestroyStrV1(s: NimStringV2) {.compilerRtl, inl.} =
+  frees(s)
+
+proc nimStrAtLe(s: string; idx: int; ch: char): bool {.compilerRtl, inl.} =
+  result = idx < s.len and s[idx] <= ch
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[ptr NimStringV2](unsafeAddr self)
+  result = if str.p != nil: str.p.cap and not strlitFlag else: 0
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
deleted file mode 100644
index 86b290230..000000000
--- a/lib/system/sysio.nim
+++ /dev/null
@@ -1,434 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# Nim's standard IO library. It contains high-performance
-# routines for reading and writing data to (buffered) files or
-# TTYs.
-
-{.push debugger:off .} # the user does not want to trace a part
-                       # of the standard library!
-
-when defined(windows):
-  proc c_fdopen(filehandle: cint, mode: cstring): File {.
-    importc: "_fdopen", header: "<stdio.h>".}
-else:
-  proc c_fdopen(filehandle: cint, mode: cstring): File {.
-    importc: "fdopen", header: "<stdio.h>".}
-proc c_fputs(c: cstring, f: File): cint {.
-  importc: "fputs", header: "<stdio.h>", tags: [WriteIOEffect].}
-proc c_fgets(c: cstring, n: cint, f: File): cstring {.
-  importc: "fgets", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc c_fgetc(stream: File): cint {.
-  importc: "fgetc", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc c_ungetc(c: cint, f: File): cint {.
-  importc: "ungetc", header: "<stdio.h>", tags: [].}
-proc c_putc(c: cint, stream: File): cint {.
-  importc: "putc", header: "<stdio.h>", tags: [WriteIOEffect].}
-proc c_fflush(f: File): cint {.
-  importc: "fflush", header: "<stdio.h>".}
-proc c_fclose(f: File): cint {.
-  importc: "fclose", header: "<stdio.h>".}
-proc c_clearerr(f: File) {.
-  importc: "clearerr", header: "<stdio.h>".}
-proc c_feof(f: File): cint {.
-  importc: "feof", header: "<stdio.h>".}
-
-when not declared(c_fwrite):
-  proc c_fwrite(buf: pointer, size, n: csize, f: File): cint {.
-    importc: "fwrite", header: "<stdio.h>".}
-
-# C routine that is used here:
-proc c_fread(buf: pointer, size, n: csize, f: File): csize {.
-  importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
-when defined(windows):
-  when not defined(amd64):
-    proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-      importc: "fseek", header: "<stdio.h>", tags: [].}
-    proc c_ftell(f: File): int64 {.
-      importc: "ftell", header: "<stdio.h>", tags: [].}
-  else:
-    proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-      importc: "_fseeki64", header: "<stdio.h>", tags: [].}
-    proc c_ftell(f: File): int64 {.
-      importc: "_ftelli64", header: "<stdio.h>", tags: [].}
-else:
-  proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-    importc: "fseeko", header: "<stdio.h>", tags: [].}
-  proc c_ftell(f: File): int64 {.
-    importc: "ftello", header: "<stdio.h>", tags: [].}
-proc c_ferror(f: File): cint {.
-  importc: "ferror", header: "<stdio.h>", tags: [].}
-proc c_setvbuf(f: File, buf: pointer, mode: cint, size: csize): cint {.
-  importc: "setvbuf", header: "<stdio.h>", tags: [].}
-
-proc raiseEIO(msg: string) {.noinline, noreturn.} =
-  sysFatal(IOError, msg)
-
-proc raiseEOF() {.noinline, noreturn.} =
-  sysFatal(EOFError, "EOF reached")
-
-proc checkErr(f: File) =
-  if c_ferror(f) != 0:
-    c_clearerr(f)
-    raiseEIO("Unknown IO Error")
-
-{.push stackTrace:off, profiler:off.}
-proc readBuffer(f: File, buffer: pointer, len: Natural): int =
-  result = c_fread(buffer, 1, len, f)
-  if result != len: checkErr(f)
-
-proc readBytes(f: File, a: var openArray[int8|uint8], start, len: Natural): int =
-  result = readBuffer(f, addr(a[start]), len)
-
-proc readChars(f: File, a: var openArray[char], start, len: Natural): int =
-  if (start + len) > len(a):
-    raiseEIO("buffer overflow: (start+len) > length of openarray buffer")
-  result = readBuffer(f, addr(a[start]), len)
-
-proc write(f: File, c: cstring) =
-  discard c_fputs(c, f)
-  checkErr(f)
-
-proc writeBuffer(f: File, buffer: pointer, len: Natural): int =
-  result = c_fwrite(buffer, 1, len, f)
-  checkErr(f)
-
-proc writeBytes(f: File, a: openArray[int8|uint8], start, len: Natural): int =
-  var x = cast[ptr UncheckedArray[int8]](a)
-  result = writeBuffer(f, addr(x[int(start)]), len)
-proc writeChars(f: File, a: openArray[char], start, len: Natural): int =
-  var x = cast[ptr UncheckedArray[int8]](a)
-  result = writeBuffer(f, addr(x[int(start)]), len)
-
-proc write(f: File, s: string) =
-  if writeBuffer(f, cstring(s), s.len) != s.len:
-    raiseEIO("cannot write string to file")
-{.pop.}
-
-when NoFakeVars:
-  when defined(windows):
-    const
-      IOFBF = cint(0)
-      IONBF = cint(4)
-  else:
-    # On all systems I could find, including Linux, Mac OS X, and the BSDs
-    const
-      IOFBF = cint(0)
-      IONBF = cint(2)
-else:
-  var
-    IOFBF {.importc: "_IOFBF", nodecl.}: cint
-    IONBF {.importc: "_IONBF", nodecl.}: cint
-
-const
-  BufSize = 4000
-
-proc close*(f: File) = discard c_fclose(f)
-proc readChar(f: File): char =
-  let x = c_fgetc(f)
-  if x < 0:
-    checkErr(f)
-    raiseEOF()
-  result = char(x)
-
-proc flushFile*(f: File) = discard c_fflush(f)
-proc getFileHandle*(f: File): FileHandle = c_fileno(f)
-
-proc readLine(f: File, line: var TaintedString): bool =
-  var pos = 0
-  var sp: cint = 80
-  # Use the currently reserved space for a first try
-  if line.string.isNil:
-    line = TaintedString(newStringOfCap(80))
-  else:
-    when not defined(nimscript):
-      sp = cint(cast[PGenericSeq](line.string).space)
-    line.string.setLen(sp)
-  while true:
-    # memset to \L so that we can tell how far fgets wrote, even on EOF, where
-    # fgets doesn't append an \L
-    c_memset(addr line.string[pos], '\L'.ord, sp)
-    var fgetsSuccess = c_fgets(addr line.string[pos], sp, f) != nil
-    if not fgetsSuccess: checkErr(f)
-    let m = c_memchr(addr line.string[pos], '\L'.ord, sp)
-    if m != nil:
-      # \l found: Could be our own or the one by fgets, in any case, we're done
-      var last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
-      if last > 0 and line.string[last-1] == '\c':
-        line.string.setLen(last-1)
-        return fgetsSuccess
-        # We have to distinguish between two possible cases:
-        # \0\l\0 => line ending in a null character.
-        # \0\l\l => last line without newline, null was put there by fgets.
-      elif last > 0 and line.string[last-1] == '\0':
-        if last < pos + sp - 1 and line.string[last+1] != '\0':
-          dec last
-      line.string.setLen(last)
-      return fgetsSuccess
-    else:
-      # fgets will have inserted a null byte at the end of the string.
-      dec sp
-    # No \l found: Increase buffer and read more
-    inc pos, sp
-    sp = 128 # read in 128 bytes at a time
-    line.string.setLen(pos+sp)
-
-proc readLine(f: File): TaintedString =
-  result = TaintedString(newStringOfCap(80))
-  if not readLine(f, result): raiseEOF()
-
-proc write(f: File, i: int) =
-  when sizeof(int) == 8:
-    if c_fprintf(f, "%lld", i) < 0: checkErr(f)
-  else:
-    if c_fprintf(f, "%ld", i) < 0: checkErr(f)
-
-proc write(f: File, i: BiggestInt) =
-  when sizeof(BiggestInt) == 8:
-    if c_fprintf(f, "%lld", i) < 0: checkErr(f)
-  else:
-    if c_fprintf(f, "%ld", i) < 0: checkErr(f)
-
-proc write(f: File, b: bool) =
-  if b: write(f, "true")
-  else: write(f, "false")
-proc write(f: File, r: float32) =
-  if c_fprintf(f, "%.16g", r) < 0: checkErr(f)
-proc write(f: File, r: BiggestFloat) =
-  if c_fprintf(f, "%.16g", r) < 0: checkErr(f)
-
-proc write(f: File, c: char) = discard c_putc(cint(c), f)
-proc write(f: File, a: varargs[string, `$`]) =
-  for x in items(a): write(f, x)
-
-proc readAllBuffer(file: File): string =
-  # This proc is for File we want to read but don't know how many
-  # bytes we need to read before the buffer is empty.
-  result = ""
-  var buffer = newString(BufSize)
-  while true:
-    var bytesRead = readBuffer(file, addr(buffer[0]), BufSize)
-    if bytesRead == BufSize:
-      result.add(buffer)
-    else:
-      buffer.setLen(bytesRead)
-      result.add(buffer)
-      break
-
-proc rawFileSize(file: File): int64 =
-  # this does not raise an error opposed to `getFileSize`
-  var oldPos = c_ftell(file)
-  discard c_fseek(file, 0, 2) # seek the end of the file
-  result = c_ftell(file)
-  discard c_fseek(file, oldPos, 0)
-
-proc endOfFile(f: File): bool =
-  var c = c_fgetc(f)
-  discard c_ungetc(c, f)
-  return c < 0'i32
-  #result = c_feof(f) != 0
-
-proc readAllFile(file: File, len: int64): string =
-  # We acquire the filesize beforehand and hope it doesn't change.
-  # Speeds things up.
-  result = newString(len)
-  let bytes = readBuffer(file, addr(result[0]), len)
-  if endOfFile(file):
-    if bytes < len:
-      result.setLen(bytes)
-  else:
-    # We read all the bytes but did not reach the EOF
-    # Try to read it as a buffer
-    result.add(readAllBuffer(file))
-
-proc readAllFile(file: File): string =
-  var len = rawFileSize(file)
-  result = readAllFile(file, len)
-
-proc readAll(file: File): TaintedString =
-  # Separate handling needed because we need to buffer when we
-  # don't know the overall length of the File.
-  when declared(stdin):
-    let len = if file != stdin: rawFileSize(file) else: -1
-  else:
-    let len = rawFileSize(file)
-  if len > 0:
-    result = readAllFile(file, len).TaintedString
-  else:
-    result = readAllBuffer(file).TaintedString
-
-proc writeLn[Ty](f: File, x: varargs[Ty, `$`]) =
-  for i in items(x):
-    write(f, i)
-  write(f, "\n")
-
-proc writeLine[Ty](f: File, x: varargs[Ty, `$`]) =
-  for i in items(x):
-    write(f, i)
-  write(f, "\n")
-
-when declared(stdout):
-  proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
-  proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
-
-# interface to the C procs:
-
-include "system/widestrs"
-
-when defined(windows) and not defined(useWinAnsi):
-  when defined(cpp):
-    proc wfopen(filename, mode: WideCString): pointer {.
-      importcpp: "_wfopen((const wchar_t*)#, (const wchar_t*)#)", nodecl.}
-    proc wfreopen(filename, mode: WideCString, stream: File): File {.
-      importcpp: "_wfreopen((const wchar_t*)#, (const wchar_t*)#, #)", nodecl.}
-  else:
-    proc wfopen(filename, mode: WideCString): pointer {.
-      importc: "_wfopen", nodecl.}
-    proc wfreopen(filename, mode: WideCString, stream: File): File {.
-      importc: "_wfreopen", nodecl.}
-
-  proc fopen(filename, mode: cstring): pointer =
-    var f = newWideCString(filename)
-    var m = newWideCString(mode)
-    result = wfopen(f, m)
-
-  proc freopen(filename, mode: cstring, stream: File): File =
-    var f = newWideCString(filename)
-    var m = newWideCString(mode)
-    result = wfreopen(f, m, stream)
-
-else:
-  proc fopen(filename, mode: cstring): pointer {.importc: "fopen", noDecl.}
-  proc freopen(filename, mode: cstring, stream: File): File {.
-    importc: "freopen", nodecl.}
-
-const
-  FormatOpen: array[FileMode, string] = ["rb", "wb", "w+b", "r+b", "ab"]
-    #"rt", "wt", "w+t", "r+t", "at"
-    # we always use binary here as for Nim the OS line ending
-    # should not be translated.
-
-when defined(posix) and not defined(nimscript):
-  when defined(linux) and defined(amd64):
-    type
-      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
-
-      # fillers ensure correct size & offsets
-      Stat {.importc: "struct stat",
-              header: "<sys/stat.h>", final, pure.} = object ## struct stat
-        filler_1: array[24, char]
-        st_mode: Mode        ## Mode of file
-        filler_2: array[144 - 24 - 4, char]
-
-    proc S_ISDIR(m: Mode): bool =
-      ## Test for a directory.
-      (m and 0o170000) == 0o40000
-
-  else:
-    type
-      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
-
-      Stat {.importc: "struct stat",
-               header: "<sys/stat.h>", final, pure.} = object ## struct stat
-        st_mode: Mode        ## Mode of file
-
-    proc S_ISDIR(m: Mode): bool {.importc, header: "<sys/stat.h>".}
-      ## Test for a directory.
-
-  proc c_fstat(a1: cint, a2: var Stat): cint {.
-    importc: "fstat", header: "<sys/stat.h>".}
-
-proc open(f: var File, filename: string,
-          mode: FileMode = fmRead,
-          bufSize: int = -1): bool =
-  var p: pointer = fopen(filename, FormatOpen[mode])
-  if p != nil:
-    when defined(posix) and not defined(nimscript):
-      # How `fopen` handles opening a directory is not specified in ISO C and
-      # POSIX. We do not want to handle directories as regular files that can
-      # be opened.
-      var f2 = cast[File](p)
-      var res: Stat
-      if c_fstat(getFileHandle(f2), res) >= 0'i32 and S_ISDIR(res.st_mode):
-        close(f2)
-        return false
-    result = true
-    f = cast[File](p)
-    if bufSize > 0 and bufSize <= high(cint).int:
-      discard c_setvbuf(f, nil, IOFBF, bufSize.cint)
-    elif bufSize == 0:
-      discard c_setvbuf(f, nil, IONBF, 0)
-
-proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool =
-  var p: pointer = freopen(filename, FormatOpen[mode], f)
-  result = p != nil
-
-proc open(f: var File, filehandle: FileHandle, mode: FileMode): bool =
-  f = c_fdopen(filehandle, FormatOpen[mode])
-  result = f != nil
-
-proc setFilePos(f: File, pos: int64, relativeTo: FileSeekPos = fspSet) =
-  if c_fseek(f, pos, cint(relativeTo)) != 0:
-    raiseEIO("cannot set file position")
-
-proc getFilePos(f: File): int64 =
-  result = c_ftell(f)
-  if result < 0: raiseEIO("cannot retrieve file position")
-
-proc getFileSize(f: File): int64 =
-  var oldPos = getFilePos(f)
-  discard c_fseek(f, 0, 2) # seek the end of the file
-  result = getFilePos(f)
-  setFilePos(f, oldPos)
-
-proc readFile(filename: string): TaintedString =
-  var f: File
-  if open(f, filename):
-    try:
-      result = readAll(f).TaintedString
-    finally:
-      close(f)
-  else:
-    sysFatal(IOError, "cannot open: ", filename)
-
-proc writeFile(filename, content: string) =
-  var f: File
-  if open(f, filename, fmWrite):
-    try:
-      f.write(content)
-    finally:
-      close(f)
-  else:
-    sysFatal(IOError, "cannot open: ", filename)
-
-proc setStdIoUnbuffered() =
-  when declared(stdout):
-    discard c_setvbuf(stdout, nil, IONBF, 0)
-  when declared(stderr):
-    discard c_setvbuf(stderr, nil, IONBF, 0)
-  when declared(stdin):
-    discard c_setvbuf(stdin, nil, IONBF, 0)
-
-when declared(stdout):
-  proc echoBinSafe(args: openArray[string]) {.compilerProc.} =
-    # flockfile deadlocks some versions of Android 5.x.x
-    when not defined(windows) and not defined(android):
-      proc flockfile(f: File) {.importc, noDecl.}
-      proc funlockfile(f: File) {.importc, noDecl.}
-      flockfile(stdout)
-    for s in args:
-      discard c_fwrite(s.cstring, s.len, 1, stdout)
-    const linefeed = "\n" # can be 1 or more chars
-    discard c_fwrite(linefeed.cstring, linefeed.len, 1, stdout)
-    discard c_fflush(stdout)
-    when not defined(windows) and not defined(android):
-      funlockfile(stdout)
-
-{.pop.}
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
deleted file mode 100644
index 6569f4f9f..000000000
--- a/lib/system/syslocks.nim
+++ /dev/null
@@ -1,228 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Low level system locks and condition vars.
-
-{.push stackTrace: off.}
-
-when defined(Windows):
-  type
-    Handle = int
-
-    SysLock {.importc: "CRITICAL_SECTION",
-              header: "<windows.h>", final, pure.} = object # CRITICAL_SECTION in WinApi
-      DebugInfo: pointer
-      LockCount: int32
-      RecursionCount: int32
-      OwningThread: int
-      LockSemaphore: int
-      SpinCount: int
-
-    SysCond = Handle
-
-  {.deprecated: [THandle: Handle, TSysLock: SysLock, TSysCond: SysCond].}
-
-  proc initSysLock(L: var SysLock) {.importc: "InitializeCriticalSection",
-                                     header: "<windows.h>".}
-    ## Initializes the lock `L`.
-
-  proc tryAcquireSysAux(L: var SysLock): int32 {.importc: "TryEnterCriticalSection",
-                                                 header: "<windows.h>".}
-    ## Tries to acquire the lock `L`.
-
-  proc tryAcquireSys(L: var SysLock): bool {.inline.} =
-    result = tryAcquireSysAux(L) != 0'i32
-
-  proc acquireSys(L: var SysLock) {.importc: "EnterCriticalSection",
-                                    header: "<windows.h>".}
-    ## Acquires the lock `L`.
-
-  proc releaseSys(L: var SysLock) {.importc: "LeaveCriticalSection",
-                                    header: "<windows.h>".}
-    ## Releases the lock `L`.
-
-  proc deinitSys(L: var SysLock) {.importc: "DeleteCriticalSection",
-                                   header: "<windows.h>".}
-
-  proc createEvent(lpEventAttributes: pointer,
-                   bManualReset, bInitialState: int32,
-                   lpName: cstring): SysCond {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CreateEventA".}
-
-  proc closeHandle(hObject: Handle) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CloseHandle".}
-  proc waitForSingleObject(hHandle: Handle, dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForSingleObject", noSideEffect.}
-
-  proc signalSysCond(hEvent: SysCond) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "SetEvent".}
-
-  proc initSysCond(cond: var SysCond) {.inline.} =
-    cond = createEvent(nil, 0'i32, 0'i32, nil)
-  proc deinitSysCond(cond: var SysCond) {.inline.} =
-    closeHandle(cond)
-  proc waitSysCond(cond: var SysCond, lock: var SysLock) =
-    releaseSys(lock)
-    discard waitForSingleObject(cond, -1'i32)
-    acquireSys(lock)
-
-  proc waitSysCondWindows(cond: var SysCond) =
-    discard waitForSingleObject(cond, -1'i32)
-
-elif defined(genode):
-  const
-    Header = "genode_cpp/syslocks.h"
-  type
-    SysLock {.importcpp: "Nim::SysLock", pure, final,
-              header: Header.} = object
-    SysCond {.importcpp: "Nim::SysCond", pure, final,
-              header: Header.} = object
-
-  proc initSysLock(L: var SysLock) = discard
-  proc deinitSys(L: var SysLock) = discard
-  proc acquireSys(L: var SysLock) {.noSideEffect, importcpp.}
-  proc tryAcquireSys(L: var SysLock): bool {.noSideEffect, importcpp.}
-  proc releaseSys(L: var SysLock) {.noSideEffect, importcpp.}
-
-  proc initSysCond(L: var SysCond) = discard
-  proc deinitSysCond(L: var SysCond) = discard
-  proc waitSysCond(cond: var SysCond, lock: var SysLock) {.
-    noSideEffect, importcpp.}
-  proc signalSysCond(cond: var SysCond) {.
-    noSideEffect, importcpp.}
-
-else:
-  type
-    SysLockObj {.importc: "pthread_mutex_t", pure, final,
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[40 div sizeof(clong), clong]
-
-    SysLockAttr {.importc: "pthread_mutexattr_t", pure, final
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[4 div sizeof(cint), cint]  # actually a cint
-
-    SysCondObj {.importc: "pthread_cond_t", pure, final,
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[48 div sizeof(clonglong), clonglong]
-
-    SysCondAttr {.importc: "pthread_condattr_t", pure, final
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[4 div sizeof(cint), cint]  # actually a cint
-
-    SysLockType = distinct cint
-
-  proc initSysLockAux(L: var SysLockObj, attr: ptr SysLockAttr) {.
-    importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.}
-  proc deinitSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_destroy", header: "<pthread.h>".}
-
-  proc acquireSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_lock", header: "<pthread.h>".}
-  proc tryAcquireSysAux(L: var SysLockObj): cint {.noSideEffect,
-    importc: "pthread_mutex_trylock", header: "<pthread.h>".}
-
-  proc releaseSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_unlock", header: "<pthread.h>".}
-
-  when defined(ios):
-    # iOS will behave badly if sync primitives are moved in memory. In order
-    # to prevent this once and for all, we're doing an extra malloc when
-    # initializing the primitive.
-    type
-      SysLock = ptr SysLockObj
-      SysCond = ptr SysCondObj
-
-    when not declared(c_malloc):
-      proc c_malloc(size: csize): pointer {.
-        importc: "malloc", header: "<stdlib.h>".}
-      proc c_free(p: pointer) {.
-        importc: "free", header: "<stdlib.h>".}
-
-    proc initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) =
-      L = cast[SysLock](c_malloc(sizeof(SysLockObj)))
-      initSysLockAux(L[], attr)
-
-    proc deinitSys(L: var SysLock) =
-      deinitSysAux(L[])
-      c_free(L)
-
-    template acquireSys(L: var SysLock) =
-      acquireSysAux(L[])
-    template tryAcquireSys(L: var SysLock): bool =
-      tryAcquireSysAux(L[]) == 0'i32
-    template releaseSys(L: var SysLock) =
-      releaseSysAux(L[])
-  else:
-    type
-      SysLock = SysLockObj
-      SysCond = SysCondObj
-
-    template initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) =
-      initSysLockAux(L, attr)
-    template deinitSys(L: var SysLock) =
-      deinitSysAux(L)
-    template acquireSys(L: var SysLock) =
-      acquireSysAux(L)
-    template tryAcquireSys(L: var SysLock): bool =
-      tryAcquireSysAux(L) == 0'i32
-    template releaseSys(L: var SysLock) =
-      releaseSysAux(L)
-
-  when insideRLocksModule:
-    proc SysLockType_Reentrant: SysLockType =
-      {.emit: "`result` = PTHREAD_MUTEX_RECURSIVE;".}
-    proc initSysLockAttr(a: var SysLockAttr) {.
-      importc: "pthread_mutexattr_init", header: "<pthread.h>", noSideEffect.}
-    proc setSysLockType(a: var SysLockAttr, t: SysLockType) {.
-      importc: "pthread_mutexattr_settype", header: "<pthread.h>", noSideEffect.}
-
-  else:
-    proc initSysCondAux(cond: var SysCondObj, cond_attr: ptr SysCondAttr = nil) {.
-      importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
-    proc deinitSysCondAux(cond: var SysCondObj) {.noSideEffect,
-      importc: "pthread_cond_destroy", header: "<pthread.h>".}
-
-    proc waitSysCondAux(cond: var SysCondObj, lock: var SysLockObj) {.
-      importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
-    proc signalSysCondAux(cond: var SysCondObj) {.
-      importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
-
-    when defined(ios):
-      proc initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) =
-        cond = cast[SysCond](c_malloc(sizeof(SysCondObj)))
-        initSysCondAux(cond[], cond_attr)
-
-      proc deinitSysCond(cond: var SysCond) =
-        deinitSysCondAux(cond[])
-        c_free(cond)
-
-      template waitSysCond(cond: var SysCond, lock: var SysLock) =
-        waitSysCondAux(cond[], lock[])
-      template signalSysCond(cond: var SysCond) =
-        signalSysCondAux(cond[])
-    else:
-      template initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) =
-        initSysCondAux(cond, cond_attr)
-      template deinitSysCond(cond: var SysCond) =
-        deinitSysCondAux(cond)
-
-      template waitSysCond(cond: var SysCond, lock: var SysLock) =
-        waitSysCondAux(cond, lock)
-      template signalSysCond(cond: var SysCond) =
-        signalSysCondAux(cond)
-
-{.pop.}
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
deleted file mode 100644
index dc2d13578..000000000
--- a/lib/system/sysspawn.nim
+++ /dev/null
@@ -1,194 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Implements Nim's 'spawn'.
-
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-{.push stackTrace:off.}
-
-# We declare our own condition variables here to get rid of the dummy lock
-# on Windows:
-
-type
-  CondVar = object
-    c: SysCond
-    when defined(posix):
-      stupidLock: SysLock
-      counter: int
-
-proc createCondVar(): CondVar =
-  initSysCond(result.c)
-  when defined(posix):
-    initSysLock(result.stupidLock)
-    #acquireSys(result.stupidLock)
-
-proc destroyCondVar(c: var CondVar) {.inline.} =
-  deinitSysCond(c.c)
-
-proc await(cv: var CondVar) =
-  when defined(posix):
-    acquireSys(cv.stupidLock)
-    while cv.counter <= 0:
-      waitSysCond(cv.c, cv.stupidLock)
-    dec cv.counter
-    releaseSys(cv.stupidLock)
-  else:
-    waitSysCondWindows(cv.c)
-
-proc signal(cv: var CondVar) =
-  when defined(posix):
-    acquireSys(cv.stupidLock)
-    inc cv.counter
-    releaseSys(cv.stupidLock)
-  signalSysCond(cv.c)
-
-type
-  FastCondVar = object
-    event, slowPath: bool
-    slow: CondVar
-
-proc createFastCondVar(): FastCondVar =
-  initSysCond(result.slow.c)
-  when defined(posix):
-    initSysLock(result.slow.stupidLock)
-    #acquireSys(result.slow.stupidLock)
-  result.event = false
-  result.slowPath = false
-
-proc await(cv: var FastCondVar) =
-  #for i in 0 .. 50:
-  #  if cas(addr cv.event, true, false):
-  #    # this is a HIT: Triggers > 95% in my tests.
-  #    return
-  #  cpuRelax()
-  #cv.slowPath = true
-  # XXX For some reason this crashes some test programs
-  await(cv.slow)
-  cv.event = false
-
-proc signal(cv: var FastCondVar) =
-  cv.event = true
-  #if cas(addr cv.slowPath, true, false):
-  signal(cv.slow)
-
-type
-  Barrier* {.compilerProc.} = object
-    counter: int
-    cv: CondVar
-
-proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
-  atomicInc b.counter
-
-proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
-  atomicDec b.counter
-  if b.counter <= 0: signal(b.cv)
-
-proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
-  b.counter = 0
-  b.cv = createCondVar()
-
-proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
-  await(b.cv)
-  destroyCondVar(b.cv)
-
-{.pop.}
-
-# ----------------------------------------------------------------------------
-
-type
-  WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
-  Worker = object
-    taskArrived: CondVar
-    taskStarted: FastCondVar #\
-    # task data:
-    f: WorkerProc
-    data: pointer
-    ready: bool # put it here for correct alignment!
-
-proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
-  let w = cast[ptr Worker](p)
-  signal(w.taskStarted)
-
-var gSomeReady = createFastCondVar()
-
-proc slave(w: ptr Worker) {.thread.} =
-  while true:
-    w.ready = true # If we instead signal "workerReady" we need the scheduler
-                   # to notice this. The scheduler could then optimize the
-                   # layout of the worker threads (e.g. keep the list sorted)
-                   # so that no search for a "ready" thread is necessary.
-                   # This might be implemented later, but is more tricky than
-                   # it looks because 'spawn' itself can run concurrently.
-    signal(gSomeReady)
-    await(w.taskArrived)
-    assert(not w.ready)
-    # shield against spurious wakeups:
-    if w.data != nil:
-      w.f(w, w.data)
-      w.data = nil
-
-const NumThreads = 4
-
-var
-  workers: array[NumThreads, Thread[ptr Worker]]
-  workersData: array[NumThreads, Worker]
-
-proc setup() =
-  for i in 0 ..< NumThreads:
-    workersData[i].taskArrived = createCondVar()
-    workersData[i].taskStarted = createFastCondVar()
-    createThread(workers[i], slave, addr(workersData[i]))
-
-proc preferSpawn*(): bool =
-  ## Use this proc to determine quickly if a 'spawn' or a direct call is
-  ## preferable. If it returns 'true' a 'spawn' may make sense. In general
-  ## it is not necessary to call this directly; use 'spawnX' instead.
-  result = gSomeReady.event
-
-proc spawn*(call: typed) {.magic: "Spawn".}
-  ## always spawns a new task, so that the 'call' is never executed on
-  ## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
-  ## is gcsafe and has 'void' as the return type.
-
-template spawnX*(call: typed) =
-  ## spawns a new task if a CPU core is ready, otherwise executes the
-  ## call in the calling thread. Usually it is advised to
-  ## use 'spawn' in order to not block the producer for an unknown
-  ## amount of time. 'call' has to be proc call 'p(...)' where 'p'
-  ## is gcsafe and has 'void' as the return type.
-  if preferSpawn(): spawn call
-  else: call
-
-proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
-  # implementation of 'spawn' that is used by the code generator.
-  while true:
-    for i in 0.. high(workers):
-      let w = addr(workersData[i])
-      if cas(addr w.ready, true, false):
-        w.data = data
-        w.f = fn
-        signal(w.taskArrived)
-        await(w.taskStarted)
-        return
-    await(gSomeReady)
-
-proc sync*() =
-  ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
-  ## waiting, you have to use an explicit barrier.
-  while true:
-    var allReady = true
-    for i in 0 .. high(workers):
-      if not allReady: break
-      allReady = allReady and workersData[i].ready
-    if allReady: break
-    await(gSomeReady)
-
-setup()
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 7b81f54da..3621c4960 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -15,42 +15,18 @@
 # we don't use refcounts because that's a behaviour
 # the programmer may not want
 
+
+proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
+
+proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
+
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
   elif old < 65536: result = old * 2
   else: result = old * 3 div 2 # for large arrays * 3/2 is better
 
-proc cmpStrings(a, b: NimString): int {.inline, compilerProc.} =
-  if a == b: return 0
-  when defined(nimNoNil):
-    let alen = if a == nil: 0 else: a.len
-    let blen = if b == nil: 0 else: b.len
-  else:
-    if a == nil: return -1
-    if b == nil: return 1
-    let alen = a.len
-    let blen = b.len
-  let minlen = min(alen, blen)
-  if minlen > 0:
-    result = c_memcmp(addr a.data, addr b.data, minlen.csize)
-    if result == 0:
-      result = alen - blen
-  else:
-    result = alen - blen
-
-proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
-  if a == b: return true
-  when defined(nimNoNil):
-    let alen = if a == nil: 0 else: a.len
-    let blen = if b == nil: 0 else: b.len
-  else:
-    if a == nil or b == nil: return false
-    let alen = a.len
-    let blen = b.len
-  if alen == blen:
-    if alen == 0: return true
-    return equalMem(addr(a.data), addr(b.data), alen)
-
 when declared(allocAtomic):
   template allocStr(size: untyped): untyped =
     cast[NimString](allocAtomic(size))
@@ -71,46 +47,49 @@ else:
   template allocStrNoInit(size: untyped): untyped =
     cast[NimString](newObjNoInit(addr(strDesc), size))
 
-proc rawNewStringNoInit(space: int): NimString {.compilerProc.} =
-  var s = space
-  if s < 7: s = 7
+proc rawNewStringNoInit(space: int): NimString =
+  let s = max(space, 7)
   result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
-  result.len = 0
   when defined(gogc):
     result.elemSize = 1
 
-proc rawNewString(space: int): NimString {.compilerProc.} =
-  var s = space
-  if s < 7: s = 7
-  result = allocStr(sizeof(TGenericSeq) + s + 1)
-  result.reserved = s
+proc rawNewString(space: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(space)
   result.len = 0
-  when defined(gogc):
-    result.elemSize = 1
+  result.data[0] = '\0'
 
-proc mnewString(len: int): NimString {.compilerProc.} =
-  result = rawNewString(len)
+proc mnewString(len: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(len)
   result.len = len
+  zeroMem(addr result.data[0], len + 1)
+
+proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  let start = max(start, 0)
+  if s == nil: return nil
+  let len = min(last, s.len-1) - start + 1
+  result = rawNewStringNoInit(len)
+  result.len = len
+  copyMem(addr(result.data), addr(s.data[start]), len)
+  result.data[len] = '\0'
 
-proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
-  var start = max(start, 0)
-  var len = min(last, s.len-1) - start + 1
-  if len > 0:
-    result = rawNewStringNoInit(len)
-    result.len = len
-    copyMem(addr(result.data), addr(s.data[start]), len)
-    result.data[len] = '\0'
-  else:
-    result = rawNewString(len)
-
-proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
+proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  if s == nil: return nil
   result = copyStrLast(s, start, s.len-1)
 
-proc toNimStr(str: cstring, len: int): NimString {.compilerProc.} =
+proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inline.} =
+  if s == nil or s.len == 0: result = cstring""
+  else: result = cast[cstring](addr s.data)
+
+proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
   result = rawNewStringNoInit(len)
   result.len = len
-  copyMem(addr(result.data), str, len + 1)
+  copyMem(addr(result.data), str, len)
+  result.data[len] = '\0'
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
   if str == nil: NimString(nil)
@@ -137,20 +116,27 @@ proc newOwnedString(src: NimString; n: int): NimString =
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
-    when declared(newObjRC1) and not defined(gcRegions):
-      var s = src.len
-      if s < 7: s = 7
-      result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
-                               s+1))
-      result.reserved = s
+    if (src.reserved and seqShallowFlag) != 0:
+      result = src
+      when declared(incRef):
+        incRef(usrToCell(result))
     else:
-      result = rawNewStringNoInit(src.len)
-    result.len = src.len
-    copyMem(addr(result.data), addr(src.data), src.len + 1)
-    sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
-    when defined(nimShallowStrings):
-      if (src.reserved and strlitFlag) != 0:
-        result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+      when declared(newObjRC1) and not defined(gcRegions):
+        var s = src.len
+        if s < 7: s = 7
+        result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
+                                s+1))
+        result.reserved = s
+        when defined(gogc):
+          result.elemSize = 1
+      else:
+        result = rawNewStringNoInit(src.len)
+      result.len = src.len
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
 
 proc copyDeepString(src: NimString): NimString {.inline.} =
   if src != nil:
@@ -158,19 +144,6 @@ proc copyDeepString(src: NimString): NimString {.inline.} =
     result.len = src.len
     copyMem(addr(result.data), addr(src.data), src.len + 1)
 
-proc hashString(s: string): int {.compilerproc.} =
-  # the compiler needs exactly the same hash function!
-  # this used to be used for efficient generation of string case statements
-  var h = 0
-  for i in 0..len(s)-1:
-    h = h +% ord(s[i])
-    h = h +% h shl 10
-    h = h xor (h shr 6)
-  h = h +% h shl 3
-  h = h xor (h shr 11)
-  h = h +% h shl 15
-  result = h
-
 proc addChar(s: NimString, c: char): NimString =
   # is compilerproc!
   if s == nil:
@@ -180,8 +153,9 @@ proc addChar(s: NimString, c: char): NimString =
     result = s
     if result.len >= result.space:
       let r = resize(result.space)
-      result = cast[NimString](growObj(result,
-        sizeof(TGenericSeq) + r + 1))
+      result = rawNewStringNoInit(r)
+      result.len = s.len
+      copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
       result.reserved = r
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
@@ -220,12 +194,14 @@ proc addChar(s: NimString, c: char): NimString =
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
   if dest == nil:
-    result = rawNewStringNoInit(addlen)
+    result = rawNewString(addlen)
   elif dest.len + addlen <= dest.space:
     result = dest
   else: # slow path:
-    var sp = max(resize(dest.space), dest.len + addlen)
-    result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
+    let sp = max(resize(dest.space), dest.len + addlen)
+    result = rawNewStringNoInit(sp)
+    result.len = dest.len
+    copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1)
     result.reserved = sp
     #result = rawNewString(sp)
     #copyMem(result, dest, dest.len + sizeof(TGenericSeq))
@@ -242,19 +218,27 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
   inc(dest.len)
 
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
-  var n = max(newLen, 0)
+  let n = max(newLen, 0)
   if s == nil:
-    result = mnewString(newLen)
+    if n == 0:
+      return s
+    else:
+      result = mnewString(n)
   elif n <= s.space:
     result = s
   else:
-    result = resizeString(s, n)
+    let sp = max(resize(s.space), n)
+    result = rawNewStringNoInit(sp)
+    result.len = s.len
+    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len)
+    zeroMem(addr result.data[s.len], n - s.len)
+    result.reserved = sp
   result.len = n
   result.data[n] = '\0'
 
 # ----------------- sequences ----------------------------------------------
 
-proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
   # increments the length by one:
   # this is needed for supporting ``add``;
   #
@@ -264,21 +248,19 @@ proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
   result = seq
   if result.len >= result.space:
     let r = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * r +
-                               GenericSeqSize))
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
     result.reserved = r
   inc(result.len)
 
-proc incrSeqV2(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+proc incrSeqV2(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
   # incrSeq version 2
   result = seq
   if result.len >= result.space:
     let r = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * r +
-                               GenericSeqSize))
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
     result.reserved = r
 
-proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerProc.} =
+proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerproc.} =
   if s == nil:
     result = cast[PGenericSeq](newSeq(typ, 1))
     result.len = 0
@@ -286,293 +268,96 @@ proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerProc.} =
     result = s
     if result.len >= result.space:
       let r = resize(result.space)
-      result = cast[PGenericSeq](growObj(result, typ.base.size * r +
-                                GenericSeqSize))
-      result.reserved = r
+      result = cast[PGenericSeq](newSeq(typ, r))
+      result.len = s.len
+      copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
 
-proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
+proc setLengthSeq(seq: PGenericSeq, elemSize, elemAlign, newLen: int): PGenericSeq {.
     compilerRtl, inl.} =
   result = seq
   if result.space < newLen:
     let r = max(resize(result.space), newLen)
-    result = cast[PGenericSeq](growObj(result, elemSize * r +
-                               GenericSeqSize))
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
     result.reserved = r
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
          not defined(gcMarkAndSweep) and not defined(gogc) and
          not defined(gcRegions):
-      when false: # compileOption("gc", "v2"):
+      if ntfNoRefs notin extGetCellType(result).base.flags:
         for i in newLen..result.len-1:
-          let len0 = gch.tempStack.len
-          forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
-                            GenericSeqSize +% (i*%elemSize)),
-                            extGetCellType(result).base, waPush)
-          let len1 = gch.tempStack.len
-          for i in len0 ..< len1:
-            doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
-          gch.tempStack.len = len0
-      else:
-        if ntfNoRefs notin extGetCellType(result).base.flags:
-          for i in newLen..result.len-1:
-            forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
-                              GenericSeqSize +% (i*%elemSize)),
-                              extGetCellType(result).base, waZctDecRef)
+          forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
+                            extGetCellType(result).base, waZctDecRef)
 
     # XXX: zeroing out the memory can still result in crashes if a wiped-out
     # cell is aliased by another pointer (ie proc parameter or a let variable).
     # This is a tough problem, because even if we don't zeroMem here, in the
     # presence of user defined destructors, the user will expect the cell to be
-    # "destroyed" thus creating the same problem. We can destoy the cell in the
+    # "destroyed" thus creating the same problem. We can destroy the cell in the
     # finalizer of the sequence, but this makes destruction non-deterministic.
-    zeroMem(cast[pointer](cast[ByteAddress](result) +% GenericSeqSize +%
-           (newLen*%elemSize)), (result.len-%newLen) *% elemSize)
+    zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
   result.len = newLen
 
 proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
     compilerRtl.} =
+  sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
   if s == nil:
-    result = cast[PGenericSeq](newSeq(typ, newLen))
-  else:
-    result = setLengthSeq(s, typ.base.size, newLen)
-
-# --------------- other string routines ----------------------------------
-proc add*(result: var string; x: int64) =
-  let base = result.len
-  setLen(result, base + sizeof(x)*4)
-  var i = 0
-  var y = x
-  while true:
-    var d = y div 10
-    result[base+i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[base+i] = '-'
-    inc(i)
-  setLen(result, base+i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[base+j], result[base+i-j-1])
-
-proc nimIntToStr(x: int): string {.compilerRtl.} =
-  result = newStringOfCap(sizeof(x)*4)
-  result.add x
-
-proc add*(result: var string; x: float) =
-  when nimvm:
-    result.add $x
-  else:
-    var buf: array[0..64, char]
-    when defined(nimNoArrayToCstringConversion):
-      var n: int = c_sprintf(addr buf, "%.16g", x)
-    else:
-      var n: int = c_sprintf(buf, "%.16g", x)
-    var hasDot = false
-    for i in 0..n-1:
-      if buf[i] == ',':
-        buf[i] = '.'
-        hasDot = true
-      elif buf[i] in {'a'..'z', 'A'..'Z', '.'}:
-        hasDot = true
-    if not hasDot:
-      buf[n] = '.'
-      buf[n+1] = '0'
-      buf[n+2] = '\0'
-    # On Windows nice numbers like '1.#INF', '-1.#INF' or '1.#NAN'
-    # of '-1.#IND' are produced.
-    # We want to get rid of these here:
-    if buf[n-1] in {'n', 'N', 'D', 'd'}:
-      result.add "nan"
-    elif buf[n-1] == 'F':
-      if buf[0] == '-':
-        result.add "-inf"
-      else:
-        result.add "inf"
-    else:
-      var i = 0
-      while buf[i] != '\0':
-        result.add buf[i]
-        inc i
-
-proc nimFloatToStr(f: float): string {.compilerproc.} =
-  result = newStringOfCap(8)
-  result.add f
-
-proc c_strtod(buf: cstring, endptr: ptr cstring): float64 {.
-  importc: "strtod", header: "<stdlib.h>", noSideEffect.}
-
-const
-  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
-  powtens =  [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
-              1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
-              1e20, 1e21, 1e22]
-
-proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
-                          start = 0): int {.compilerProc.} =
-  # This routine attempt to parse float that can parsed quickly.
-  # ie whose integer part can fit inside a 53bits integer.
-  # their real exponent must also be <= 22. If the float doesn't follow
-  # these restrictions, transform the float into this form:
-  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
-  # This avoid the problems of decimal character portability.
-  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
-  var
-    i = start
-    sign = 1.0
-    kdigits, fdigits = 0
-    exponent: int
-    integer: uint64
-    frac_exponent = 0
-    exp_sign = 1
-    first_digit = -1
-    has_sign = false
-
-  # Sign?
-  if s[i] == '+' or s[i] == '-':
-    has_sign = true
-    if s[i] == '-':
-      sign = -1.0
-    inc(i)
-
-  # NaN?
-  if s[i] == 'N' or s[i] == 'n':
-    if s[i+1] == 'A' or s[i+1] == 'a':
-      if s[i+2] == 'N' or s[i+2] == 'n':
-        if s[i+3] notin IdentChars:
-          number = NaN
-          return i+3 - start
-    return 0
-
-  # Inf?
-  if s[i] == 'I' or s[i] == 'i':
-    if s[i+1] == 'N' or s[i+1] == 'n':
-      if s[i+2] == 'F' or s[i+2] == 'f':
-        if s[i+3] notin IdentChars:
-          number = Inf*sign
-          return i+3 - start
-    return 0
-
-  if s[i] in {'0'..'9'}:
-    first_digit = (s[i].ord - '0'.ord)
-  # Integer part?
-  while s[i] in {'0'..'9'}:
-    inc(kdigits)
-    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
-    inc(i)
-    while s[i] == '_': inc(i)
-
-  # Fractional part?
-  if s[i] == '.':
-    inc(i)
-    # if no integer part, Skip leading zeros
-    if kdigits <= 0:
-      while s[i] == '0':
-        inc(frac_exponent)
-        inc(i)
-        while s[i] == '_': inc(i)
-
-    if first_digit == -1 and s[i] in {'0'..'9'}:
-      first_digit = (s[i].ord - '0'.ord)
-    # get fractional part
-    while s[i] in {'0'..'9'}:
-      inc(fdigits)
-      inc(frac_exponent)
-      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
-      inc(i)
-      while s[i] == '_': inc(i)
-
-  # if has no digits: return error
-  if kdigits + fdigits <= 0 and
-     (i == start or # no char consumed (empty string).
-     (i == start + 1 and has_sign)): # or only '+' or '-
-    return 0
-
-  if s[i] in {'e', 'E'}:
-    inc(i)
-    if s[i] == '+' or s[i] == '-':
-      if s[i] == '-':
-        exp_sign = -1
-
-      inc(i)
-    if s[i] notin {'0'..'9'}:
-      return 0
-    while s[i] in {'0'..'9'}:
-      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
-      inc(i)
-      while s[i] == '_': inc(i) # underscores are allowed and ignored
-
-  var real_exponent = exp_sign*exponent - frac_exponent
-  let exp_negative = real_exponent < 0
-  var abs_exponent = abs(real_exponent)
-
-  # if exponent greater than can be represented: +/- zero or infinity
-  if abs_exponent > 999:
-    if exp_negative:
-      number = 0.0*sign
+    if newLen == 0:
+      result = s
     else:
-      number = Inf*sign
-    return i - start
-
-  # if integer is representable in 53 bits:  fast path
-  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
-  let digits = kdigits + fdigits
-  if digits <= 15 or (digits <= 16 and first_digit <= 8):
-    # max float power of ten with set bits above the 53th bit is 10^22
-    if abs_exponent <= 22:
-      if exp_negative:
-        number = sign * integer.float / powtens[abs_exponent]
-      else:
-        number = sign * integer.float * powtens[abs_exponent]
-      return i - start
-
-    # if exponent is greater try to fit extra exponent above 22 by multiplying
-    # integer part is there is space left.
-    let slop = 15 - kdigits - fdigits
-    if  abs_exponent <= 22 + slop and not exp_negative:
-      number = sign * integer.float * powtens[slop] * powtens[abs_exponent-slop]
-      return i - start
-
-  # if failed: slow path with strtod.
-  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
-  var ti = 0
-  let maxlen = t.high - "e+000".len # reserve enough space for exponent
-
-  result = i - start
-  i = start
-  # re-parse without error checking, any error should be handled by the code above.
-  if s[i] == '.': i.inc
-  while s[i] in {'0'..'9','+','-'}:
-    if ti < maxlen:
-      t[ti] = s[i]; inc(ti)
-    inc(i)
-    while s[i] in {'.', '_'}: # skip underscore and decimal point
-      inc(i)
-
-  # insert exponent
-  t[ti] = 'E'; inc(ti)
-  t[ti] = (if exp_negative: '-' else: '+'); inc(ti)
-  inc(ti, 3)
-
-  # insert adjusted exponent
-  t[ti-1] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
-  t[ti-2] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
-  t[ti-3] = ('0'.ord + abs_exponent mod 10).char
-
-  when defined(nimNoArrayToCstringConversion):
-    number = c_strtod(addr t, nil)
+      result = cast[PGenericSeq](newSeq(typ, newLen))
   else:
-    number = c_strtod(t, nil)
-
-proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
-  result = newStringOfCap(sizeof(x)*4)
-  result.add x
-
-proc nimBoolToStr(x: bool): string {.compilerRtl.} =
-  return if x: "true" else: "false"
+    let elemSize = typ.base.size
+    let elemAlign = typ.base.align
+    if s.space < newLen:
+      let r = max(resize(s.space), newLen)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      copyMem(dataPointer(result, elemAlign), dataPointer(s, elemAlign), s.len * elemSize)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+    elif newLen < s.len:
+      result = s
+      # we need to decref here, otherwise the GC leaks!
+      when not defined(boehmGC) and not defined(nogc) and
+          not defined(gcMarkAndSweep) and not defined(gogc) and
+          not defined(gcRegions):
+        if ntfNoRefs notin typ.base.flags:
+          for i in newLen..result.len-1:
+            forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
+                              extGetCellType(result).base, waZctDecRef)
 
-proc nimCharToStr(x: char): string {.compilerRtl.} =
-  result = newString(1)
-  result[0] = x
+      # XXX: zeroing out the memory can still result in crashes if a wiped-out
+      # cell is aliased by another pointer (ie proc parameter or a let variable).
+      # This is a tough problem, because even if we don't zeroMem here, in the
+      # presence of user defined destructors, the user will expect the cell to be
+      # "destroyed" thus creating the same problem. We can destroy the cell in the
+      # finalizer of the sequence, but this makes destruction non-deterministic.
+      zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
+    else:
+      result = s
+      zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
+    result.len = newLen
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[NimString](self)
+  result = if str != nil: str.space else: 0
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[PGenericSeq](self)
+  result = if sek != nil: sek.space else: 0
diff --git a/lib/system/threadids.nim b/lib/system/threadids.nim
new file mode 100644
index 000000000..3a6eadcbb
--- /dev/null
+++ b/lib/system/threadids.nim
@@ -0,0 +1,103 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# we need to cache current threadId to not perform syscall all the time
+var threadId {.threadvar.}: int
+
+when defined(windows):
+  proc getCurrentThreadId(): int32 {.
+    stdcall, dynlib: "kernel32", importc: "GetCurrentThreadId".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getCurrentThreadId())
+    result = threadId
+
+elif defined(linux):
+  proc syscall(arg: clong): clong {.varargs, importc: "syscall", header: "<unistd.h>".}
+  when defined(amd64):
+    const NR_gettid = clong(186)
+  else:
+    var NR_gettid {.importc: "__NR_gettid", header: "<sys/syscall.h>".}: clong
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(NR_gettid))
+    result = threadId
+
+elif defined(dragonfly):
+  proc lwp_gettid(): int32 {.importc, header: "unistd.h".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_gettid())
+    result = threadId
+
+elif defined(openbsd):
+  proc getthrid(): int32 {.importc: "getthrid", header: "<unistd.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getthrid())
+    result = threadId
+
+elif defined(netbsd):
+  proc lwp_self(): int32 {.importc: "_lwp_self", header: "<lwp.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_self())
+    result = threadId
+
+elif defined(freebsd):
+  proc syscall(arg: cint, arg0: ptr cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thr_self {.importc:"SYS_thr_self", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    var tid = 0.cint
+    if threadId == 0:
+      discard syscall(SYS_thr_self, addr tid)
+      threadId = tid
+    result = threadId
+
+elif defined(macosx):
+  proc syscall(arg: cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thread_selfid {.importc:"SYS_thread_selfid", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(SYS_thread_selfid))
+    result = threadId
+
+elif defined(solaris):
+  type thread_t {.importc: "thread_t", header: "<thread.h>".} = distinct int
+  proc thr_self(): thread_t {.importc, header: "<thread.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(thr_self())
+    result = threadId
+
+elif defined(haiku):
+  type thr_id {.importc: "thread_id", header: "<OS.h>".} = distinct int32
+  proc find_thread(name: cstring): thr_id {.importc, header: "<OS.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(find_thread(nil))
+    result = threadId
diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim
new file mode 100644
index 000000000..285b8f5e7
--- /dev/null
+++ b/lib/system/threadimpl.nim
@@ -0,0 +1,111 @@
+var
+  nimThreadDestructionHandlers* {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}]
+when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions):
+  proc deallocOsPages() {.rtl, raises: [].}
+proc threadTrouble() {.raises: [], gcsafe.}
+# create for the main thread. Note: do not insert this data into the list
+# of all threads; it's not to be stopped etc.
+when not defined(useNimRtl):
+  #when not defined(createNimRtl): initStackBottom()
+  when declared(initGC):
+    initGC()
+    when not emulatedThreadVars:
+      type ThreadType {.pure.} = enum
+        None = 0,
+        NimThread = 1,
+        ForeignThread = 2
+      var
+        threadType {.rtlThreadVar.}: ThreadType
+
+      threadType = ThreadType.NimThread
+
+when defined(gcDestructors):
+  proc deallocThreadStorage(p: pointer) = c_free(p)
+else:
+  template deallocThreadStorage(p: pointer) = deallocShared(p)
+
+template afterThreadRuns() =
+  for i in countdown(nimThreadDestructionHandlers.len-1, 0):
+    nimThreadDestructionHandlers[i]()
+
+proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) =
+  ## Registers a *thread local* handler that is called at the thread's
+  ## destruction.
+  ##
+  ## A thread is destructed when the `.thread` proc returns
+  ## normally or when it raises an exception. Note that unhandled exceptions
+  ## in a thread nevertheless cause the whole process to die.
+  nimThreadDestructionHandlers.add handler
+
+when defined(boehmgc):
+  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
+  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
+    {.importc: "GC_call_with_stack_base", boehmGC.}
+  proc boehmGC_register_my_thread(sb: pointer)
+    {.importc: "GC_register_my_thread", boehmGC.}
+  proc boehmGC_unregister_my_thread()
+    {.importc: "GC_unregister_my_thread", boehmGC.}
+
+  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} =
+    boehmGC_register_my_thread(sb)
+    try:
+      let thrd = cast[ptr Thread[TArg]](thrd)
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        thrd.dataFn(thrd.data)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+    boehmGC_unregister_my_thread()
+else:
+  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+    try:
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        when defined(nimV2):
+          thrd.dataFn(thrd.data)
+        else:
+          var x: TArg
+          deepCopy(x, thrd.data)
+          thrd.dataFn(x)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+      when hasAllocStack:
+        deallocThreadStorage(thrd.rawStack)
+
+proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+  when defined(boehmgc):
+    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
+  elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors:
+    var p {.volatile.}: pointer
+    # init the GC for refc/markandsweep
+    nimGC_setStackBottom(addr(p))
+    when declared(initGC):
+      initGC()
+    when declared(threadType):
+      threadType = ThreadType.NimThread
+    threadProcWrapDispatch[TArg](thrd)
+    when declared(deallocOsPages): deallocOsPages()
+  else:
+    threadProcWrapDispatch(thrd)
+
+template nimThreadProcWrapperBody*(closure: untyped): untyped =
+  var thrd = cast[ptr Thread[TArg]](closure)
+  var core = thrd.core
+  when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core)
+  threadProcWrapStackFrame(thrd)
+  # Since an unhandled exception terminates the whole process (!), there is
+  # no need for a ``try finally`` here, nor would it be correct: The current
+  # exception is tried to be re-raised by the code-gen after the ``finally``!
+  # However this is doomed to fail, because we already unmapped every heap
+  # page!
+
+  # mark as not running anymore:
+  thrd.core = nil
+  thrd.dataFn = nil
+  deallocThreadStorage(cast[pointer](core))
diff --git a/lib/system/threadlocalstorage.nim b/lib/system/threadlocalstorage.nim
new file mode 100644
index 000000000..e6ad9dca5
--- /dev/null
+++ b/lib/system/threadlocalstorage.nim
@@ -0,0 +1,125 @@
+import std/private/threadtypes
+
+when defined(windows):
+  type
+    ThreadVarSlot = distinct int32
+
+  proc threadVarAlloc(): ThreadVarSlot {.
+    importc: "TlsAlloc", stdcall, header: "<windows.h>".}
+  proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
+    importc: "TlsSetValue", stdcall, header: "<windows.h>".}
+  proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
+    importc: "TlsGetValue", stdcall, header: "<windows.h>".}
+
+  proc getLastError(): uint32 {.
+    importc: "GetLastError", stdcall, header: "<windows.h>".}
+  proc setLastError(x: uint32) {.
+    importc: "SetLastError", stdcall, header: "<windows.h>".}
+
+  proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
+    let realLastError = getLastError()
+    result = tlsGetValue(dwTlsIndex)
+    setLastError(realLastError)
+
+elif defined(genode):
+  const
+    GenodeHeader = "genode_cpp/threads.h"
+
+  type
+    ThreadVarSlot = int
+
+  proc threadVarAlloc(): ThreadVarSlot = 0
+
+  proc offMainThread(): bool {.
+    importcpp: "Nim::SysThread::offMainThread",
+    header: GenodeHeader.}
+
+  proc threadVarSetValue(value: pointer) {.
+    importcpp: "Nim::SysThread::threadVarSetValue(@)",
+    header: GenodeHeader.}
+
+  proc threadVarGetValue(): pointer {.
+    importcpp: "Nim::SysThread::threadVarGetValue()",
+    header: GenodeHeader.}
+
+  var mainTls: pointer
+
+  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
+    if offMainThread():
+      threadVarSetValue(value);
+    else:
+      mainTls = value
+
+  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
+    if offMainThread():
+      threadVarGetValue();
+    else:
+      mainTls
+
+else:
+  when not (defined(macosx) or defined(haiku)):
+    {.passl: "-pthread".}
+
+  when not defined(haiku):
+    {.passc: "-pthread".}
+
+  when (defined(linux) or defined(nintendoswitch)) and defined(amd64):
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                    header: "<sys/types.h>".} = distinct cuint
+  elif defined(openbsd) and defined(amd64):
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                     header: "<pthread.h>".} = cint
+  else:
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                     header: "<sys/types.h>".} = object
+
+  proc pthread_getspecific(a1: ThreadVarSlot): pointer {.
+    importc: "pthread_getspecific", header: pthreadh.}
+  proc pthread_key_create(a1: ptr ThreadVarSlot,
+                          destruct: proc (x: pointer) {.noconv.}): int32 {.
+    importc: "pthread_key_create", header: pthreadh.}
+  proc pthread_key_delete(a1: ThreadVarSlot): int32 {.
+    importc: "pthread_key_delete", header: pthreadh.}
+
+  proc pthread_setspecific(a1: ThreadVarSlot, a2: pointer): int32 {.
+    importc: "pthread_setspecific", header: pthreadh.}
+
+  proc threadVarAlloc(): ThreadVarSlot {.inline.} =
+    discard pthread_key_create(addr(result), nil)
+  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
+    discard pthread_setspecific(s, value)
+  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
+    result = pthread_getspecific(s)
+
+
+when emulatedThreadVars:
+  # the compiler generates this proc for us, so that we can get the size of
+  # the thread local var block; we use this only for sanity checking though
+  proc nimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".}
+
+
+
+when emulatedThreadVars:
+  var globalsSlot: ThreadVarSlot
+
+  when not defined(useNimRtl):
+    var mainThread: GcThread
+
+  proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
+    result = addr(cast[PGcThread](threadVarGetValue(globalsSlot)).tls)
+
+  proc initThreadVarsEmulation() {.compilerproc, inline.} =
+    when not defined(useNimRtl):
+      globalsSlot = threadVarAlloc()
+      when declared(mainThread):
+        threadVarSetValue(globalsSlot, addr(mainThread))
+
+when not defined(useNimRtl):
+  when emulatedThreadVars:
+    if nimThreadVarsSize() > sizeof(ThreadLocalStorage):
+      c_fprintf(cstderr, """too large thread local storage size requested,
+use -d:\"nimTlsSize=X\" to setup even more or stop using unittest.nim""")
+      rawQuit 1
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
deleted file mode 100644
index c8ea03f92..000000000
--- a/lib/system/threads.nim
+++ /dev/null
@@ -1,716 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Thread support for Nim. **Note**: This is part of the system module.
-## Do not import it directly. To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## Nim's memory model for threads is quite different from other common
-## programming languages (C, Pascal): Each thread has its own
-## (garbage collected) heap and sharing of memory is restricted. This helps
-## to prevent race conditions and improves efficiency. See `the manual for
-## details of this memory model <manual.html#threads>`_.
-##
-## Example:
-##
-## .. code-block:: Nim
-##
-##  import locks
-##
-##  var
-##    thr: array[0..4, Thread[tuple[a,b: int]]]
-##    L: Lock
-##
-##  proc threadFunc(interval: tuple[a,b: int]) {.thread.} =
-##    for i in interval.a..interval.b:
-##      acquire(L) # lock stdout
-##      echo i
-##      release(L)
-##
-##  initLock(L)
-##
-##  for i in 0..high(thr):
-##    createThread(thr[i], threadFunc, (i*10, i*10+5))
-##  joinThreads(thr)
-
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-const
-  maxRegisters = 256 # don't think there is an arch with more registers
-  useStackMaskHack = false ## use the stack mask hack for better performance
-  StackGuardSize = 4096
-  ThreadStackMask =
-    when defined(genode):
-      1024*64*sizeof(int)-1
-    else:
-      1024*256*sizeof(int)-1
-  ThreadStackSize = ThreadStackMask+1 - StackGuardSize
-
-when defined(windows):
-  type
-    SysThread* = Handle
-    WinThreadProc = proc (x: pointer): int32 {.stdcall.}
-  {.deprecated: [TSysThread: SysThread].}
-
-  proc createThread(lpThreadAttributes: pointer, dwStackSize: int32,
-                     lpStartAddress: WinThreadProc,
-                     lpParameter: pointer,
-                     dwCreationFlags: int32,
-                     lpThreadId: var int32): SysThread {.
-    stdcall, dynlib: "kernel32", importc: "CreateThread".}
-
-  proc winSuspendThread(hThread: SysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "SuspendThread".}
-
-  proc winResumeThread(hThread: SysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "ResumeThread".}
-
-  proc waitForMultipleObjects(nCount: int32,
-                              lpHandles: ptr SysThread,
-                              bWaitAll: int32,
-                              dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".}
-
-  proc terminateThread(hThread: SysThread, dwExitCode: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "TerminateThread".}
-
-  proc getCurrentThreadId(): int32 {.
-    stdcall, dynlib: "kernel32", importc: "GetCurrentThreadId".}
-
-  type
-    ThreadVarSlot = distinct int32
-
-  when true:
-    proc threadVarAlloc(): ThreadVarSlot {.
-      importc: "TlsAlloc", stdcall, header: "<windows.h>".}
-    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
-      importc: "TlsSetValue", stdcall, header: "<windows.h>".}
-    proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
-      importc: "TlsGetValue", stdcall, header: "<windows.h>".}
-
-    proc getLastError(): uint32 {.
-      importc: "GetLastError", stdcall, header: "<windows.h>".}
-    proc setLastError(x: uint32) {.
-      importc: "SetLastError", stdcall, header: "<windows.h>".}
-
-    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
-      let realLastError = getLastError()
-      result = tlsGetValue(dwTlsIndex)
-      setLastError(realLastError)
-  else:
-    proc threadVarAlloc(): ThreadVarSlot {.
-      importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
-    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
-      importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
-    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
-      importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
-
-  proc setThreadAffinityMask(hThread: SysThread, dwThreadAffinityMask: uint) {.
-    importc: "SetThreadAffinityMask", stdcall, header: "<windows.h>".}
-
-elif defined(genode):
-  import genode/env
-  const
-    GenodeHeader = "genode_cpp/threads.h"
-  type
-    SysThread* {.importcpp: "Nim::SysThread",
-                 header: GenodeHeader, final, pure.} = object
-    GenodeThreadProc = proc (x: pointer) {.noconv.}
-    ThreadVarSlot = int
-
-  proc initThread(s: var SysThread,
-                  env: GenodeEnv,
-                  stackSize: culonglong,
-                  entry: GenodeThreadProc,
-                  arg: pointer,
-                  affinity: cuint) {.
-    importcpp: "#.initThread(@)".}
-
-  proc threadVarAlloc(): ThreadVarSlot = 0
-
-  proc offMainThread(): bool {.
-    importcpp: "Nim::SysThread::offMainThread",
-    header: GenodeHeader.}
-
-  proc threadVarSetValue(value: pointer) {.
-    importcpp: "Nim::SysThread::threadVarSetValue(@)",
-    header: GenodeHeader.}
-
-  proc threadVarGetValue(): pointer {.
-    importcpp: "Nim::SysThread::threadVarGetValue()",
-    header: GenodeHeader.}
-
-  var mainTls: pointer
-
-  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
-    if offMainThread():
-      threadVarSetValue(value);
-    else:
-      mainTls = value
-
-  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
-    if offMainThread():
-      threadVarGetValue();
-    else:
-      mainTls
-
-else:
-  when not defined(macosx):
-    {.passL: "-pthread".}
-
-  {.passC: "-pthread".}
-  const
-    schedh = "#define _GNU_SOURCE\n#include <sched.h>"
-    pthreadh = "#define _GNU_SOURCE\n#include <pthread.h>"
-
-  when not declared(Time):
-    when defined(linux):
-      type Time = clong
-    else:
-      type Time = int
-
-  when defined(linux) and defined(amd64):
-    type
-      SysThread* {.importc: "pthread_t",
-                  header: "<sys/types.h>" .} = distinct culong
-      Pthread_attr {.importc: "pthread_attr_t",
-                    header: "<sys/types.h>".} = object
-        abi: array[56 div sizeof(clong), clong]
-      ThreadVarSlot {.importc: "pthread_key_t",
-                    header: "<sys/types.h>".} = distinct cuint
-  else:
-    type
-      SysThread* {.importc: "pthread_t", header: "<sys/types.h>".} = object
-      Pthread_attr {.importc: "pthread_attr_t",
-                       header: "<sys/types.h>".} = object
-      ThreadVarSlot {.importc: "pthread_key_t",
-                     header: "<sys/types.h>".} = object
-  type
-    Timespec {.importc: "struct timespec", header: "<time.h>".} = object
-      tv_sec: Time
-      tv_nsec: clong
-  {.deprecated: [TSysThread: SysThread, Tpthread_attr: PThreadAttr,
-                Ttimespec: Timespec, TThreadVarSlot: ThreadVarSlot].}
-
-  proc pthread_attr_init(a1: var PthreadAttr) {.
-    importc, header: pthreadh.}
-  proc pthread_attr_setstacksize(a1: var PthreadAttr, a2: int) {.
-    importc, header: pthreadh.}
-
-  proc pthread_create(a1: var SysThread, a2: var PthreadAttr,
-            a3: proc (x: pointer): pointer {.noconv.},
-            a4: pointer): cint {.importc: "pthread_create",
-            header: pthreadh.}
-  proc pthread_join(a1: SysThread, a2: ptr pointer): cint {.
-    importc, header: pthreadh.}
-
-  proc pthread_cancel(a1: SysThread): cint {.
-    importc: "pthread_cancel", header: pthreadh.}
-
-  proc pthread_getspecific(a1: ThreadVarSlot): pointer {.
-    importc: "pthread_getspecific", header: pthreadh.}
-  proc pthread_key_create(a1: ptr ThreadVarSlot,
-                          destruct: proc (x: pointer) {.noconv.}): int32 {.
-    importc: "pthread_key_create", header: pthreadh.}
-  proc pthread_key_delete(a1: ThreadVarSlot): int32 {.
-    importc: "pthread_key_delete", header: pthreadh.}
-
-  proc pthread_setspecific(a1: ThreadVarSlot, a2: pointer): int32 {.
-    importc: "pthread_setspecific", header: pthreadh.}
-
-  proc threadVarAlloc(): ThreadVarSlot {.inline.} =
-    discard pthread_key_create(addr(result), nil)
-  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
-    discard pthread_setspecific(s, value)
-  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
-    result = pthread_getspecific(s)
-
-  when useStackMaskHack:
-    proc pthread_attr_setstack(attr: var PthreadAttr, stackaddr: pointer,
-                               size: int): cint {.
-      importc: "pthread_attr_setstack", header: pthreadh.}
-
-  type CpuSet {.importc: "cpu_set_t", header: schedh.} = object
-     when defined(linux) and defined(amd64):
-       abi: array[1024 div (8 * sizeof(culong)), culong]
-
-  proc cpusetZero(s: var CpuSet) {.importc: "CPU_ZERO", header: schedh.}
-  proc cpusetIncl(cpu: cint; s: var CpuSet) {.
-    importc: "CPU_SET", header: schedh.}
-
-  proc setAffinity(thread: SysThread; setsize: csize; s: var CpuSet) {.
-    importc: "pthread_setaffinity_np", header: pthreadh.}
-
-const
-  emulatedThreadVars = compileOption("tlsEmulation")
-
-when emulatedThreadVars:
-  # the compiler generates this proc for us, so that we can get the size of
-  # the thread local var block; we use this only for sanity checking though
-  proc nimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".}
-
-# we preallocate a fixed size for thread local storage, so that no heap
-# allocations are needed. Currently less than 16K are used on a 64bit machine.
-# We use ``float`` for proper alignment:
-const nimTlsSize {.intdefine.} = 16000
-type
-  ThreadLocalStorage = array[0..(nimTlsSize div sizeof(float)), float]
-
-  PGcThread = ptr GcThread
-  GcThread {.pure, inheritable.} = object
-    when emulatedThreadVars and not useStackMaskHack:
-      tls: ThreadLocalStorage
-    else:
-      nil
-    when hasSharedHeap:
-      next, prev: PGcThread
-      stackBottom, stackTop: pointer
-      stackSize: int
-    else:
-      nil
-{.deprecated: [TThreadLocalStorage: ThreadLocalStorage, TGcThread: GcThread].}
-
-when not defined(useNimRtl):
-  when not useStackMaskHack:
-    var mainThread: GcThread
-
-#const globalsSlot = ThreadVarSlot(0)
-#sysAssert checkSlot.int == globalsSlot.int
-
-when emulatedThreadVars:
-  # XXX it'd be more efficient to not use a global variable for the
-  # thread storage slot, but to rely on the implementation to assign slot X
-  # for us... ;-)
-  var globalsSlot: ThreadVarSlot
-
-  proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
-    result = addr(cast[PGcThread](threadVarGetValue(globalsSlot)).tls)
-
-  proc initThreadVarsEmulation() {.compilerProc, inline.} =
-    when not defined(useNimRtl):
-      globalsSlot = threadVarAlloc()
-      when declared(mainThread):
-        threadVarSetValue(globalsSlot, addr(mainThread))
-
-when useStackMaskHack:
-  proc maskStackPointer(offset: int): pointer {.compilerRtl, inl.} =
-    var x {.volatile.}: pointer
-    x = addr(x)
-    result = cast[pointer]((cast[int](x) and not ThreadStackMask) +%
-      (0) +% offset)
-
-# create for the main thread. Note: do not insert this data into the list
-# of all threads; it's not to be stopped etc.
-when not defined(useNimRtl):
-  when not useStackMaskHack:
-    #when not defined(createNimRtl): initStackBottom()
-    when declared(initGC):
-      initGC()
-      when not emulatedThreadVars:
-        type ThreadType {.pure.} = enum
-          None = 0,
-          NimThread = 1,
-          ForeignThread = 2
-        var
-          threadType {.rtlThreadVar.}: ThreadType
-
-        threadType = ThreadType.NimThread
-
-
-
-  when emulatedThreadVars:
-    if nimThreadVarsSize() > sizeof(ThreadLocalStorage):
-      echo "too large thread local storage size requested ",
-           "(", nimThreadVarsSize(), "/", sizeof(ThreadLocalStorage), "). ",
-           "Use -d:\"nimTlsSize=", nimThreadVarsSize(),
-           "\" to preallocate sufficient storage."
-
-      quit 1
-
-  when hasSharedHeap and not defined(boehmgc) and not defined(gogc) and not defined(nogc):
-    var
-      threadList: PGcThread
-
-    proc registerThread(t: PGcThread) =
-      # we need to use the GC global lock here!
-      acquireSys(HeapLock)
-      t.prev = nil
-      t.next = threadList
-      if threadList != nil:
-        sysAssert(threadList.prev == nil, "threadList.prev == nil")
-        threadList.prev = t
-      threadList = t
-      releaseSys(HeapLock)
-
-    proc unregisterThread(t: PGcThread) =
-      # we need to use the GC global lock here!
-      acquireSys(HeapLock)
-      if t == threadList: threadList = t.next
-      if t.next != nil: t.next.prev = t.prev
-      if t.prev != nil: t.prev.next = t.next
-      # so that a thread can be unregistered twice which might happen if the
-      # code executes `destroyThread`:
-      t.next = nil
-      t.prev = nil
-      releaseSys(HeapLock)
-
-    # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that
-    # the GC can examine the stacks?
-    proc stopTheWord() = discard
-
-# We jump through some hops here to ensure that Nim thread procs can have
-# the Nim calling convention. This is needed because thread procs are
-# ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just
-# use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway.
-
-type
-  Thread* {.pure, final.}[TArg] = object
-    core: PGcThread
-    sys: SysThread
-    when TArg is void:
-      dataFn: proc () {.nimcall, gcsafe.}
-    else:
-      dataFn: proc (m: TArg) {.nimcall, gcsafe.}
-      data: TArg
-
-{.deprecated: [TThread: Thread].}
-
-var
-  threadDestructionHandlers {.rtlThreadVar.}: seq[proc () {.closure, gcsafe.}]
-
-proc onThreadDestruction*(handler: proc () {.closure, gcsafe.}) =
-  ## Registers a *thread local* handler that is called at the thread's
-  ## destruction.
-  ## A thread is destructed when the ``.thread`` proc returns
-  ## normally or when it raises an exception. Note that unhandled exceptions
-  ## in a thread nevertheless cause the whole process to die.
-  if threadDestructionHandlers.isNil:
-    threadDestructionHandlers = @[]
-  threadDestructionHandlers.add handler
-
-template afterThreadRuns() =
-  for i in countdown(threadDestructionHandlers.len-1, 0):
-    threadDestructionHandlers[i]()
-
-when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions):
-  proc deallocOsPages() {.rtl.}
-
-when defined(boehmgc):
-  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
-  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
-    {.importc: "GC_call_with_stack_base", boehmGC.}
-  proc boehmGC_register_my_thread(sb: pointer)
-    {.importc: "GC_register_my_thread", boehmGC.}
-  proc boehmGC_unregister_my_thread()
-    {.importc: "GC_unregister_my_thread", boehmGC.}
-
-  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv.} =
-    boehmGC_register_my_thread(sb)
-    try:
-      let thrd = cast[ptr Thread[TArg]](thrd)
-      when TArg is void:
-        thrd.dataFn()
-      else:
-        thrd.dataFn(thrd.data)
-    finally:
-      afterThreadRuns()
-    boehmGC_unregister_my_thread()
-else:
-  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) =
-    try:
-      when TArg is void:
-        thrd.dataFn()
-      else:
-        var x: TArg
-        deepCopy(x, thrd.data)
-        thrd.dataFn(x)
-    finally:
-      afterThreadRuns()
-
-proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) =
-  when defined(boehmgc):
-    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
-  elif not defined(nogc) and not defined(gogc) and not defined(gcRegions):
-    var p {.volatile.}: proc(a: ptr Thread[TArg]) {.nimcall.} =
-      threadProcWrapDispatch[TArg]
-    when not hasSharedHeap:
-      # init the GC for refc/markandsweep
-      nimGC_setStackBottom(addr(p))
-      initGC()
-      when declared(threadType):
-        threadType = ThreadType.NimThread
-    when declared(registerThread):
-      thrd.core.stackBottom = addr(thrd)
-      registerThread(thrd.core)
-    p(thrd)
-    when declared(registerThread): unregisterThread(thrd.core)
-    when declared(deallocOsPages): deallocOsPages()
-  else:
-    threadProcWrapDispatch(thrd)
-
-template threadProcWrapperBody(closure: untyped): untyped =
-  var thrd = cast[ptr Thread[TArg]](closure)
-  var core = thrd.core
-  when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core)
-  when declared(initAllocator):
-    initAllocator()
-  threadProcWrapStackFrame(thrd)
-  # Since an unhandled exception terminates the whole process (!), there is
-  # no need for a ``try finally`` here, nor would it be correct: The current
-  # exception is tried to be re-raised by the code-gen after the ``finally``!
-  # However this is doomed to fail, because we already unmapped every heap
-  # page!
-
-  # mark as not running anymore:
-  thrd.core = nil
-  thrd.dataFn = nil
-  deallocShared(cast[pointer](core))
-
-{.push stack_trace:off.}
-when defined(windows):
-  proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} =
-    threadProcWrapperBody(closure)
-    # implicitly return 0
-elif defined(genode):
-   proc threadProcWrapper[TArg](closure: pointer) {.noconv.} =
-    threadProcWrapperBody(closure)
-else:
-  proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} =
-    threadProcWrapperBody(closure)
-{.pop.}
-
-proc running*[TArg](t: Thread[TArg]): bool {.inline.} =
-  ## returns true if `t` is running.
-  result = t.dataFn != nil
-
-proc handle*[TArg](t: Thread[TArg]): SysThread {.inline.} =
-  ## returns the thread handle of `t`.
-  result = t.sys
-
-when hostOS == "windows":
-  const MAXIMUM_WAIT_OBJECTS = 64
-
-  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
-    ## waits for the thread `t` to finish.
-    discard waitForSingleObject(t.sys, -1'i32)
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## waits for every thread in `t` to finish.
-    var a: array[MAXIMUM_WAIT_OBJECTS, SysThread]
-    var k = 0
-    while k < len(t):
-      var count = min(len(t) - k, MAXIMUM_WAIT_OBJECTS)
-      for i in 0..(count - 1): a[i] = t[i + k].sys
-      discard waitForMultipleObjects(int32(count),
-                                     cast[ptr SysThread](addr(a)), 1, -1)
-      inc(k, MAXIMUM_WAIT_OBJECTS)
-
-elif defined(genode):
-  proc joinThread*[TArg](t: Thread[TArg]) {.importcpp.}
-    ## waits for the thread `t` to finish.
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## waits for every thread in `t` to finish.
-    for i in 0..t.high: joinThread(t[i])
-
-else:
-  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
-    ## waits for the thread `t` to finish.
-    discard pthread_join(t.sys, nil)
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## waits for every thread in `t` to finish.
-    for i in 0..t.high: joinThread(t[i])
-
-when false:
-  # XXX a thread should really release its heap here somehow:
-  proc destroyThread*[TArg](t: var Thread[TArg]) =
-    ## forces the thread `t` to terminate. This is potentially dangerous if
-    ## you don't have full control over `t` and its acquired resources.
-    when hostOS == "windows":
-      discard TerminateThread(t.sys, 1'i32)
-    else:
-      discard pthread_cancel(t.sys)
-    when declared(registerThread): unregisterThread(addr(t))
-    t.dataFn = nil
-    ## if thread `t` already exited, `t.core` will be `null`.
-    if not isNil(t.core):
-      deallocShared(t.core)
-      t.core = nil
-
-when hostOS == "windows":
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    ## creates a new thread `t` and starts its execution. Entry point is the
-    ## proc `tp`. `param` is passed to `tp`. `TArg` can be ``void`` if you
-    ## don't need to pass any data to the thread.
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.core.stackSize = ThreadStackSize
-    var dummyThreadId: int32
-    t.sys = createThread(nil, ThreadStackSize, threadProcWrapper[TArg],
-                         addr(t), 0'i32, dummyThreadId)
-    if t.sys <= 0:
-      raise newException(ResourceExhaustedError, "cannot create thread")
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    ## pins a thread to a `CPU`:idx:. In other words sets a
-    ## thread's `affinity`:idx:. If you don't know what this means, you
-    ## shouldn't use this proc.
-    setThreadAffinityMask(t.sys, uint(1 shl cpu))
-
-elif defined(genode):
-  var affinityOffset: cuint = 1
-    ## CPU affinity offset for next thread, safe to roll-over
-
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.stackSize = ThreadStackSize
-    t.sys.initThread(
-      runtimeEnv,
-      ThreadStackSize.culonglong,
-      threadProcWrapper[TArg], addr(t), affinityOffset)
-    inc affinityOffset
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    {.hint: "cannot change Genode thread CPU affinity after initialization".}
-    discard
-
-else:
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    ## creates a new thread `t` and starts its execution. Entry point is the
-    ## proc `tp`. `param` is passed to `tp`. `TArg` can be ``void`` if you
-    ## don't need to pass any data to the thread.
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.core.stackSize = ThreadStackSize
-    var a {.noinit.}: PthreadAttr
-    pthread_attr_init(a)
-    pthread_attr_setstacksize(a, ThreadStackSize)
-    if pthread_create(t.sys, a, threadProcWrapper[TArg], addr(t)) != 0:
-      raise newException(ResourceExhaustedError, "cannot create thread")
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    ## pins a thread to a `CPU`:idx:. In other words sets a
-    ## thread's `affinity`:idx:. If you don't know what this means, you
-    ## shouldn't use this proc.
-    when not defined(macosx):
-      var s {.noinit.}: CpuSet
-      cpusetZero(s)
-      cpusetIncl(cpu.cint, s)
-      setAffinity(t.sys, sizeof(s), s)
-
-proc createThread*(t: var Thread[void], tp: proc () {.thread, nimcall.}) =
-  createThread[void](t, tp)
-
-when false:
-  proc mainThreadId*[TArg](): ThreadId[TArg] =
-    ## returns the thread ID of the main thread.
-    result = cast[ThreadId[TArg]](addr(mainThread))
-
-when useStackMaskHack:
-  proc runMain(tp: proc () {.thread.}) {.compilerproc.} =
-    var mainThread: Thread[pointer]
-    createThread(mainThread, tp)
-    joinThread(mainThread)
-
-## we need to cache current threadId to not perform syscall all the time
-var threadId {.threadvar.}: int
-
-when defined(windows):
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(getCurrentThreadId())
-    result = threadId
-
-elif defined(linux):
-  proc syscall(arg: clong): clong {.varargs, importc: "syscall", header: "<unistd.h>".}
-  when defined(amd64):
-    const NR_gettid = clong(186)
-  else:
-    var NR_gettid {.importc: "__NR_gettid", header: "<sys/syscall.h>".}: clong
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(syscall(NR_gettid))
-    result = threadId
-
-elif defined(dragonfly):
-  proc lwp_gettid(): int32 {.importc, header: "unistd.h".}
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(lwp_gettid())
-    result = threadId
-
-elif defined(openbsd):
-  proc getthrid(): int32 {.importc: "getthrid", header: "<unistd.h>".}
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(getthrid())
-    result = threadId
-
-elif defined(netbsd):
-  proc lwp_self(): int32 {.importc: "_lwp_self", header: "<lwp.h>".}
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(lwp_self())
-    result = threadId
-
-elif defined(freebsd):
-  proc syscall(arg: cint, arg0: ptr cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
-  var SYS_thr_self {.importc:"SYS_thr_self", header:"<sys/syscall.h>"}: cint
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    var tid = 0.cint
-    if threadId == 0:
-      discard syscall(SYS_thr_self, addr tid)
-      threadId = tid
-    result = threadId
-
-elif defined(macosx):
-  proc syscall(arg: cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
-  var SYS_thread_selfid {.importc:"SYS_thread_selfid", header:"<sys/syscall.h>".}: cint
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(syscall(SYS_thread_selfid))
-    result = threadId
-
-elif defined(solaris):
-  type thread_t {.importc: "thread_t", header: "<thread.h>".} = distinct int
-  proc thr_self(): thread_t {.importc, header: "<thread.h>".}
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(thr_self())
-    result = threadId
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index f2ebad2c1..ffb0f7716 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -8,12 +8,11 @@
 #
 
 ## Timer support for the realtime GC. Based on
-## `<https://github.com/jckarter/clay/blob/master/compiler/src/hirestimer.cpp>`_
+## `<https://github.com/jckarter/clay/blob/master/compiler/hirestimer.cpp>`_
 
 type
   Ticks = distinct int64
   Nanos = int64
-{.deprecated: [TTicks: Ticks, TNanos: Nanos].}
 
 when defined(windows):
 
@@ -32,15 +31,14 @@ when defined(windows):
 
     result = Nanos(float64(a.int64 - b.int64) * performanceCounterRate)
 
-elif defined(macosx):
+elif defined(macosx) and not defined(emscripten):
   type
     MachTimebaseInfoData {.pure, final,
         importc: "mach_timebase_info_data_t",
         header: "<mach/mach_time.h>".} = object
-      numer, denom: int32
-  {.deprecated: [TMachTimebaseInfoData: MachTimebaseInfoData].}
+      numer, denom: int32 # note: `uint32` in sources
 
-  proc mach_absolute_time(): int64 {.importc, header: "<mach/mach.h>".}
+  proc mach_absolute_time(): uint64 {.importc, header: "<mach/mach_time.h>".}
   proc mach_timebase_info(info: var MachTimebaseInfoData) {.importc,
     header: "<mach/mach_time.h>".}
 
@@ -51,7 +49,7 @@ elif defined(macosx):
   mach_timebase_info(timeBaseInfo)
 
   proc `-`(a, b: Ticks): Nanos =
-    result = (a.int64 - b.int64)  * timeBaseInfo.numer div timeBaseInfo.denom
+    result = (a.int64 - b.int64) * timeBaseInfo.numer div timeBaseInfo.denom
 
 elif defined(posixRealtime):
   type
@@ -61,7 +59,6 @@ elif defined(posixRealtime):
                final, pure.} = object ## struct timespec
       tv_sec: int  ## Seconds.
       tv_nsec: int ## Nanoseconds.
-  {.deprecated: [TClockid: Clockid, TTimeSpec: TimeSpec].}
 
   var
     CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: Clockid
@@ -89,7 +86,7 @@ else:
                final, pure.} = object ## struct timeval
       tv_sec: Time  ## Seconds.
       tv_usec: clong ## Microseconds.
-  {.deprecated: [Ttimeval: Timeval].}
+
   proc posix_gettimeofday(tp: var Timeval, unused: pointer = nil) {.
     importc: "gettimeofday", header: "<sys/time.h>".}
 
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
deleted file mode 100644
index a8b28c279..000000000
--- a/lib/system/widestrs.nim
+++ /dev/null
@@ -1,168 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Nim support for C/C++'s `wide strings`:idx:. This is part of the system
-# module! Do not import it directly!
-
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-type
-  Utf16Char* = distinct int16
-  WideCString* = ref UncheckedArray[Utf16Char]
-{.deprecated: [TUtf16Char: Utf16Char].}
-
-proc len*(w: WideCString): int =
-  ## returns the length of a widestring. This traverses the whole string to
-  ## find the binary zero end marker!
-  while int16(w[result]) != 0'i16: inc result
-
-const
-  UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
-  UNI_MAX_BMP = 0x0000FFFF
-  UNI_MAX_UTF16 = 0x0010FFFF
-  UNI_MAX_UTF32 = 0x7FFFFFFF
-  UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
-
-  halfShift = 10
-  halfBase = 0x0010000
-  halfMask = 0x3FF
-
-  UNI_SUR_HIGH_START = 0xD800
-  UNI_SUR_HIGH_END = 0xDBFF
-  UNI_SUR_LOW_START = 0xDC00
-  UNI_SUR_LOW_END = 0xDFFF
-  UNI_REPL = 0xFFFD
-
-template ones(n: untyped): untyped = ((1 shl n)-1)
-
-template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
-  ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
-  ## `i` is incremented by the number of bytes that have been processed.
-  bind ones
-
-  if ord(s[i]) <=% 127:
-    result = ord(s[i])
-    when doInc: inc(i)
-  elif ord(s[i]) shr 5 == 0b110:
-    #assert(ord(s[i+1]) shr 6 == 0b10)
-    if i <= L - 2:
-      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-      when doInc: inc(i, 2)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 4 == 0b1110:
-    if i <= L - 3:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(4)) shl 12 or
-               (ord(s[i+1]) and ones(6)) shl 6 or
-               (ord(s[i+2]) and ones(6))
-      when doInc: inc(i, 3)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 3 == 0b11110:
-    if i <= L - 4:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      #assert(ord(s[i+3]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(3)) shl 18 or
-               (ord(s[i+1]) and ones(6)) shl 12 or
-               (ord(s[i+2]) and ones(6)) shl 6 or
-               (ord(s[i+3]) and ones(6))
-      when doInc: inc(i, 4)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  else:
-    result = 0xFFFD
-    when doInc: inc(i)
-
-iterator runes(s: cstring, L: int): int =
-  var
-    i = 0
-    result: int
-  while i < L:
-    fastRuneAt(s, i, L, result, true)
-    yield result
-
-proc newWideCString*(source: cstring, L: int): WideCString =
-  unsafeNew(result, L * 4 + 2)
-  #result = cast[wideCString](alloc(L * 4 + 2))
-  var d = 0
-  for ch in runes(source, L):
-    if ch <=% UNI_MAX_BMP:
-      if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
-        result[d] = UNI_REPLACEMENT_CHAR
-      else:
-        result[d] = Utf16Char(toU16(ch))
-    elif ch >% UNI_MAX_UTF16:
-      result[d] = UNI_REPLACEMENT_CHAR
-    else:
-      let ch = ch -% halfBase
-      result[d] = Utf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
-      inc d
-      result[d] = Utf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
-    inc d
-  result[d] = Utf16Char(0'i16)
-
-proc newWideCString*(s: cstring): WideCString =
-  if s.isNil: return nil
-
-  result = newWideCString(s, s.len)
-
-proc newWideCString*(s: string): WideCString =
-  result = newWideCString(s, s.len)
-
-proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
-  result = newStringOfCap(estimate + estimate shr 2)
-
-  var i = 0
-  while w[i].int16 != 0'i16:
-    var ch = int(cast[uint16](w[i]))
-    inc i
-    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
-      # If the 16 bits following the high surrogate are in the source buffer...
-      let ch2 = int(cast[uint16](w[i]))
-
-      # If it's a low surrogate, convert to UTF32:
-      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
-        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
-        inc i
-      else:
-        #invalid UTF-16
-        ch = replacement
-    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
-      #invalid UTF-16
-      ch = replacement
-
-    if ch < 0x80:
-      result.add chr(ch)
-    elif ch < 0x800:
-      result.add chr((ch shr 6) or 0xc0)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch < 0x10000:
-      result.add chr((ch shr 12) or 0xe0)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch <= 0x10FFFF:
-      result.add chr((ch shr 18) or 0xf0)
-      result.add chr(((ch shr 12) and 0x3f) or 0x80)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    else:
-      # replacement char(in case user give very large number):
-      result.add chr(0xFFFD shr 12 or 0b1110_0000)
-      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
-
-proc `$`*(s: WideCString): string =
-  result = s $ 80