78 files changed, 4825 insertions, 7002 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 95658c49a..3de6d8713 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -11,6 +11,8 @@
 {.push profiler:off.}
 
 include osalloc
+import std/private/syslocks
+import std/sysatomics
 
 template track(op, address, size) =
   when defined(memTracker):
@@ -18,11 +20,42 @@ template track(op, address, size) =
 
 # We manage *chunks* of memory. Each chunk is a multiple of the page size.
 # Each chunk starts at an address that is divisible by the page size.
+# Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
+
+# An allocation of a small pointer looks approximately like this
+#[
+
+  alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
+              |                                                                                                   |
+              v                                                                                                   |
+    Free chunk available                                                                                          |
+              |                                                                                                   |
+              v                                                                                                   v
+      Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
+    (may not add new cells)                                                                                       ^
+              |                                                                                                   |
+              v                                                                                                   |
+     Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
+]#
+# so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
+#
+#
+# A deallocation of a small pointer then looks like this
+#[
+  dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active    -> Chunk is completely unused -----> Chunk references no foreign cells
+                                      |                                       |                   (Add cell into the current chunk)                 |                  Return the current chunk back to tlsf
+                                      |                                       |                                   |                                 |
+                                      v                                       v                                   v                                 v
+                      A different thread owns this chunk.     The current chunk is not active.          chunk.free was < size      Chunk references foreign cells, noop
+                      Add the cell to a.sharedFreeLists      Add the cell into the active chunk          Activate the chunk                       (end)
+                                    (end)                                    (end)                              (end)
+]#
+# So "true" deallocation is delayed for as long as possible in favor of reusing cells.
 
 const
   nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
   SmallChunkSize = PageSize
-  MaxFli = 30
+  MaxFli = when sizeof(int) > 2: 30 else: 14
   MaxLog2Sli = 5 # 32, this cannot be increased without changing 'uint32'
                  # everywhere!
   MaxSli = 1 shl MaxLog2Sli
@@ -30,7 +63,7 @@ const
   RealFli = MaxFli - FliOffset
 
   # size of chunks in last matrix bin
-  MaxBigChunkSize = 1 shl MaxFli - 1 shl (MaxFli-MaxLog2Sli-1)
+  MaxBigChunkSize = int(1'i32 shl MaxFli - 1'i32 shl (MaxFli-MaxLog2Sli-1))
   HugeChunkSize = MaxBigChunkSize + 1
 
 type
@@ -44,8 +77,33 @@ type
   IntSet = object
     data: TrunkBuckets
 
+# ------------- chunk table ---------------------------------------------------
+# We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
+# endings of big chunks. This is needed by the merging operation. The only
+# remaining operation is best-fit for big chunks. Since there is a size-limit
+# for big chunks (because greater than the limit means they are returned back
+# to the OS), a fixed size array can be used.
+
+type
+  PLLChunk = ptr LLChunk
+  LLChunk = object ## *low-level* chunk
+    size: int                # remaining size
+    acc: int                 # accumulator
+    next: PLLChunk           # next low-level chunk; only needed for dealloc
+
+  PAvlNode = ptr AvlNode
+  AvlNode = object
+    link: array[0..1, PAvlNode] # Left (0) and right (1) links
+    key, upperBound: int
+    level: int
+
+const
+  RegionHasLock = false # hasThreadSupport and defined(gcDestructors)
+
 type
   FreeCell {.final, pure.} = object
+    # A free cell is a pointer that has been freed, meaning it became available for reuse.
+    # It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
     next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
     when not defined(gcDestructors):
       zeroField: int       # 0 means cell is not used (overlaid with typ field)
@@ -61,46 +119,27 @@ type
     prevSize: int        # size of previous chunk; for coalescing
                          # 0th bit == 1 if 'used
     size: int            # if < PageSize it is a small chunk
+    owner: ptr MemRegion
 
   SmallChunk = object of BaseChunk
     next, prev: PSmallChunk  # chunks of the same size
-    freeList: ptr FreeCell
-    free: int            # how many bytes remain
-    acc: int             # accumulator for small object allocation
-    when defined(nimAlignPragma):
-      data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
-    else:
-      data: UncheckedArray[byte]
+    freeList: ptr FreeCell   # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
+                             #  Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
+    free: int32              # Bytes this chunk is able to provide using both the accumulator and free cells.
+                             # When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
+                             #  reaches dealloc while the source chunk is active.
+                             # Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
+    acc: uint32              # Offset from data, used when there are no free cells available but the chunk is considered free.
+    foreignCells: int        # When a free cell is given to a chunk that is not its origin,
+                             #  both the cell and the source chunk are considered foreign.
+                             # Receiving a foreign cell can happen both when deallocating from another thread or when
+                             #  the active chunk in `a.freeSmallChunks` is not the current chunk.
+                             # Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
 
   BigChunk = object of BaseChunk # not necessarily > PageSize!
     next, prev: PBigChunk    # chunks of the same (or bigger) size
-    when defined(nimAlignPragma):
-      data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
-    else:
-      data: UncheckedArray[byte]
-
-template smallChunkOverhead(): untyped = sizeof(SmallChunk)
-template bigChunkOverhead(): untyped = sizeof(BigChunk)
-
-# ------------- chunk table ---------------------------------------------------
-# We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
-# endings of big chunks. This is needed by the merging operation. The only
-# remaining operation is best-fit for big chunks. Since there is a size-limit
-# for big chunks (because greater than the limit means they are returned back
-# to the OS), a fixed size array can be used.
-
-type
-  PLLChunk = ptr LLChunk
-  LLChunk = object ## *low-level* chunk
-    size: int                # remaining size
-    acc: int                 # accumulator
-    next: PLLChunk           # next low-level chunk; only needed for dealloc
-
-  PAvlNode = ptr AvlNode
-  AvlNode = object
-    link: array[0..1, PAvlNode] # Left (0) and right (1) links
-    key, upperBound: int
-    level: int
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
 
   HeapLinks = object
     len: int
@@ -108,23 +147,54 @@ type
     next: ptr HeapLinks
 
   MemRegion = object
-    minLargeObj, maxLargeObj: int
-    freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
+    when not defined(gcDestructors):
+      minLargeObj, maxLargeObj: int
+    freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
+      # List of available chunks per size class. Only one is expected to be active per class.
+    when defined(gcDestructors):
+      sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
+        # When a thread frees a pointer it did not create, it must not adjust the counters.
+        # Instead, the cell is placed here and deferred until the next allocation.
     flBitmap: uint32
     slBitmap: array[RealFli, uint32]
     matrix: array[RealFli, array[MaxSli, PBigChunk]]
     llmem: PLLChunk
     currMem, maxMem, freeMem, occ: int # memory sizes (allocated from OS)
     lastSize: int # needed for the case that OS gives us pages linearly
+    when RegionHasLock:
+      lock: SysLock
+    when defined(gcDestructors):
+      sharedFreeListBigChunks: PBigChunk # make no attempt at avoiding false sharing for now for this object field
+
     chunkStarts: IntSet
-    root, deleted, last, freeAvlNodes: PAvlNode
-    locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
+    when not defined(gcDestructors):
+      root, deleted, last, freeAvlNodes: PAvlNode
+    lockActive, locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
     nextChunkSize: int
-    bottomData: AvlNode
+    when not defined(gcDestructors):
+      bottomData: AvlNode
     heapLinks: HeapLinks
     when defined(nimTypeNames):
       allocCounter, deallocCounter: int
 
+template smallChunkOverhead(): untyped = sizeof(SmallChunk)
+template bigChunkOverhead(): untyped = sizeof(BigChunk)
+
+when hasThreadSupport:
+  template loada(x: untyped): untyped = atomicLoadN(unsafeAddr x, ATOMIC_RELAXED)
+  template storea(x, y: untyped) = atomicStoreN(unsafeAddr x, y, ATOMIC_RELAXED)
+
+  when false:
+    # not yet required
+    template atomicStatDec(x, diff: untyped) = discard atomicSubFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+    template atomicStatInc(x, diff: untyped) = discard atomicAddFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+else:
+  template loada(x: untyped): untyped = x
+  template storea(x, y: untyped) = x = y
+
+template atomicStatDec(x, diff: untyped) = dec x, diff
+template atomicStatInc(x, diff: untyped) = inc x, diff
+
 const
   fsLookupTable: array[byte, int8] = [
     -1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -165,7 +235,7 @@ proc mappingSearch(r, fl, sl: var int) {.inline.} =
   let t = roundup((1 shl (msbit(uint32 r) - MaxLog2Sli)), PageSize) - 1
   r = r + t
   r = r and not t
-  r = min(r, MaxBigChunkSize)
+  r = min(r, MaxBigChunkSize).int
   fl = msbit(uint32 r)
   sl = (r shr (fl - MaxLog2Sli)) - MaxSli
   dec fl, FliOffset
@@ -231,11 +301,11 @@ proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) =
   setBit(fl, a.flBitmap)
 
 proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
-  inc(a.currMem, bytes)
+  atomicStatInc(a.currMem, bytes)
 
 proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
   a.maxMem = max(a.maxMem, a.currMem)
-  dec(a.currMem, bytes)
+  atomicStatDec(a.currMem, bytes)
 
 proc getMaxMem(a: var MemRegion): int =
   # Since we update maxPagesCount only when freeing pages,
@@ -243,6 +313,20 @@ proc getMaxMem(a: var MemRegion): int =
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
 
+const nimMaxHeap {.intdefine.} = 0
+
+proc allocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osAllocPages(size)
+
+proc tryAllocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osTryAllocPages(size)
+
 proc llAlloc(a: var MemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
   # is done at the end of the allocator's life time.
@@ -252,49 +336,50 @@ proc llAlloc(a: var MemRegion, size: int): pointer =
     # is one page:
     sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
     var old = a.llmem # can be nil and is correct with nil
-    a.llmem = cast[PLLChunk](osAllocPages(PageSize))
-    when defined(avlcorruption):
+    a.llmem = cast[PLLChunk](allocPages(a, PageSize))
+    when defined(nimAvlcorruption):
       trackLocation(a.llmem, PageSize)
     incCurrMem(a, PageSize)
     a.llmem.size = PageSize - sizeof(LLChunk)
     a.llmem.acc = sizeof(LLChunk)
     a.llmem.next = old
-  result = cast[pointer](cast[ByteAddress](a.llmem) + a.llmem.acc)
+  result = cast[pointer](cast[int](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
 
-proc getBottom(a: var MemRegion): PAvlNode =
-  result = addr(a.bottomData)
-  if result.link[0] == nil:
-    result.link[0] = result
-    result.link[1] = result
-
-proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
-  if a.freeAvlNodes != nil:
-    result = a.freeAvlNodes
-    a.freeAvlNodes = a.freeAvlNodes.link[0]
-  else:
-    result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
-    when defined(avlcorruption):
-      cprintf("tracking location: %p\n", result)
-  result.key = key
-  result.upperBound = upperBound
-  let bottom = getBottom(a)
-  result.link[0] = bottom
-  result.link[1] = bottom
-  result.level = 1
-  #when defined(avlcorruption):
-  #  track("allocAvlNode", result, sizeof(AvlNode))
-  sysAssert(bottom == addr(a.bottomData), "bottom data")
-  sysAssert(bottom.link[0] == bottom, "bottom link[0]")
-  sysAssert(bottom.link[1] == bottom, "bottom link[1]")
-
-proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
-  n.link[0] = a.freeAvlNodes
-  a.freeAvlNodes = n
-
-proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
+when not defined(gcDestructors):
+  proc getBottom(a: var MemRegion): PAvlNode =
+    result = addr(a.bottomData)
+    if result.link[0] == nil:
+      result.link[0] = result
+      result.link[1] = result
+
+  proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
+    if a.freeAvlNodes != nil:
+      result = a.freeAvlNodes
+      a.freeAvlNodes = a.freeAvlNodes.link[0]
+    else:
+      result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
+      when defined(nimAvlcorruption):
+        cprintf("tracking location: %p\n", result)
+    result.key = key
+    result.upperBound = upperBound
+    let bottom = getBottom(a)
+    result.link[0] = bottom
+    result.link[1] = bottom
+    result.level = 1
+    #when defined(nimAvlcorruption):
+    #  track("allocAvlNode", result, sizeof(AvlNode))
+    sysAssert(bottom == addr(a.bottomData), "bottom data")
+    sysAssert(bottom.link[0] == bottom, "bottom link[0]")
+    sysAssert(bottom.link[1] == bottom, "bottom link[1]")
+
+  proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
+    n.link[0] = a.freeAvlNodes
+    a.freeAvlNodes = n
+
+proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int): ptr HeapLinks =
   var it = addr(a.heapLinks)
   while it != nil and it.len >= it.chunks.len: it = it.next
   if it == nil:
@@ -303,12 +388,15 @@ proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
     a.heapLinks.next = n
     n.chunks[0] = (p, size)
     n.len = 1
+    result = n
   else:
     let L = it.len
     it.chunks[L] = (p, size)
     inc it.len
+    result = it
 
-include "system/avltree"
+when not defined(gcDestructors):
+  include "system/avltree"
 
 proc llDeallocAll(a: var MemRegion) =
   var it = a.llmem
@@ -373,7 +461,7 @@ iterator elements(t: IntSet): int {.inline.} =
       r = r.next
 
 proc isSmallChunk(c: PChunk): bool {.inline.} =
-  return c.size <= SmallChunkSize-smallChunkOverhead()
+  result = c.size <= SmallChunkSize-smallChunkOverhead()
 
 proc chunkUnused(c: PChunk): bool {.inline.} =
   result = (c.prevSize and 1) == 0
@@ -389,8 +477,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
           var c = cast[PSmallChunk](c)
 
           let size = c.size
-          var a = cast[ByteAddress](addr(c.data))
-          let limit = a + c.acc
+          var a = cast[int](addr(c.data))
+          let limit = a + c.acc.int
           while a <% limit:
             yield cast[pointer](a)
             a = a +% size
@@ -408,13 +496,13 @@ when not defined(gcDestructors):
 
 # ------------- chunk management ----------------------------------------------
 proc pageIndex(c: PChunk): int {.inline.} =
-  result = cast[ByteAddress](c) shr PageShift
+  result = cast[int](c) shr PageShift
 
 proc pageIndex(p: pointer): int {.inline.} =
-  result = cast[ByteAddress](p) shr PageShift
+  result = cast[int](p) shr PageShift
 
 proc pageAddr(p: pointer): PChunk {.inline.} =
-  result = cast[PChunk](cast[ByteAddress](p) and not PageMask)
+  result = cast[PChunk](cast[int](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
 when false:
@@ -426,49 +514,44 @@ when false:
                 it, it.next, it.prev, it.size)
       it = it.next
 
-const nimMaxHeap {.intdefine.} = 0
-
 proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
   when not defined(emscripten):
     if not a.blockChunkSizeIncrease:
       let usedMem = a.occ #a.currMem # - a.freeMem
-      when nimMaxHeap != 0:
-        if usedMem > nimMaxHeap * 1024 * 1024:
-          raiseOutOfMem()
       if usedMem < 64 * 1024:
         a.nextChunkSize = PageSize*4
       else:
         a.nextChunkSize = min(roundup(usedMem shr 2, PageSize), a.nextChunkSize * 2)
-        a.nextChunkSize = min(a.nextChunkSize, MaxBigChunkSize)
+        a.nextChunkSize = min(a.nextChunkSize, MaxBigChunkSize).int
 
   var size = size
   if size > a.nextChunkSize:
-    result = cast[PBigChunk](osAllocPages(size))
+    result = cast[PBigChunk](allocPages(a, size))
   else:
-    result = cast[PBigChunk](osTryAllocPages(a.nextChunkSize))
+    result = cast[PBigChunk](tryAllocPages(a, a.nextChunkSize))
     if result == nil:
-      result = cast[PBigChunk](osAllocPages(size))
+      result = cast[PBigChunk](allocPages(a, size))
       a.blockChunkSizeIncrease = true
     else:
       size = a.nextChunkSize
 
   incCurrMem(a, size)
   inc(a.freeMem, size)
-  a.addHeapLink(result, size)
+  let heapLink = a.addHeapLink(result, size)
   when defined(debugHeapLinks):
     cprintf("owner: %p; result: %p; next pointer %p; size: %ld\n", addr(a),
-      result, result.heapLink, result.size)
+      result, heapLink, size)
 
   when defined(memtracker):
     trackLocation(addr result.size, sizeof(int))
 
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "requestOsChunks 1")
+  sysAssert((cast[int](result) and PageMask) == 0, "requestOsChunks 1")
   #zeroMem(result, size)
   result.next = nil
   result.prev = nil
   result.size = size
   # update next.prevSize:
-  var nxt = cast[ByteAddress](result) +% size
+  var nxt = cast[int](result) +% size
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 2")
   var next = cast[PChunk](nxt)
   if pageIndex(next) in a.chunkStarts:
@@ -476,7 +559,7 @@ proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
     next.prevSize = size or (next.prevSize and 1)
   # set result.prevSize:
   var lastSize = if a.lastSize != 0: a.lastSize else: PageSize
-  var prv = cast[ByteAddress](result) -% lastSize
+  var prv = cast[int](result) -% lastSize
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 3")
   var prev = cast[PChunk](prv)
   if pageIndex(prev) in a.chunkStarts and prev.size == lastSize:
@@ -522,21 +605,22 @@ proc listRemove[T](head: var T, c: T) {.inline.} =
 
 proc updatePrevSize(a: var MemRegion, c: PBigChunk,
                     prevSize: int) {.inline.} =
-  var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-  sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "updatePrevSize")
+  var ri = cast[PChunk](cast[int](c) +% c.size)
+  sysAssert((cast[int](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
     ri.prevSize = prevSize or (ri.prevSize and 1)
 
 proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
-  result = cast[PBigChunk](cast[ByteAddress](c) +% size)
+  result = cast[PBigChunk](cast[int](c) +% size)
   result.size = c.size - size
   track("result.size", addr result.size, sizeof(int))
-  # XXX check if these two nil assignments are dead code given
-  # addChunkToMatrix's implementation:
-  result.next = nil
-  result.prev = nil
+  when not defined(nimOptimizedSplitChunk):
+    # still active because of weird codegen issue on some of our CIs:
+    result.next = nil
+    result.prev = nil
   # size and not used:
   result.prevSize = size
+  result.owner = addr a
   sysAssert((size and 1) == 0, "splitChunk 2")
   sysAssert((size and PageMask) == 0,
       "splitChunk: size is not a multiple of the PageSize")
@@ -556,8 +640,8 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
   when coalescLeft:
     let prevSize = c.prevSize
     if prevSize != 0:
-      var le = cast[PChunk](cast[ByteAddress](c) -% prevSize)
-      sysAssert((cast[ByteAddress](le) and PageMask) == 0, "freeBigChunk 4")
+      var le = cast[PChunk](cast[int](c) -% prevSize)
+      sysAssert((cast[int](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
         sysAssert(not isSmallChunk(le), "freeBigChunk 5")
         if not isSmallChunk(le) and le.size < MaxBigChunkSize:
@@ -567,11 +651,14 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
           c = cast[PBigChunk](le)
           if c.size > MaxBigChunkSize:
             let rest = splitChunk2(a, c, MaxBigChunkSize)
+            when defined(nimOptimizedSplitChunk):
+              rest.next = nil
+              rest.prev = nil
             addChunkToMatrix(a, c)
             c = rest
   when coalescRight:
-    var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-    sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "freeBigChunk 2")
+    var ri = cast[PChunk](cast[int](c) +% c.size)
+    sysAssert((cast[int](ri) and PageMask) == 0, "freeBigChunk 2")
     if isAccessible(a, ri) and chunkUnused(ri):
       sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
       if not isSmallChunk(ri) and c.size < MaxBigChunkSize:
@@ -591,6 +678,13 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
   mappingSearch(size, fl, sl)
   sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
   result = findSuitableBlock(a, fl, sl)
+
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
+
   if result == nil:
     if size < nimMinHeapPages * PageSize:
       result = requestOsChunks(a, nimMinHeapPages * PageSize)
@@ -600,6 +694,7 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
       # if we over allocated split the chunk:
       if result.size > size:
         splitChunk(a, result, size)
+    result.owner = addr a
   else:
     removeChunkFromMatrix2(a, result, fl, sl)
     if result.size >= size + PageSize:
@@ -607,22 +702,33 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
   # set 'used' to to true:
   result.prevSize = 1
   track("setUsedToFalse", addr result.size, sizeof(int))
+  sysAssert result.owner == addr a, "getBigChunk: No owner set!"
 
   incl(a, a.chunkStarts, pageIndex(result))
   dec(a.freeMem, size)
+  when RegionHasLock:
+    releaseSys a.lock
 
 proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
-  result = cast[PBigChunk](osAllocPages(size))
+  result = cast[PBigChunk](allocPages(a, size))
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
   incCurrMem(a, size)
   # XXX add this to the heap links. But also remove it from it later.
   when false: a.addHeapLink(result, size)
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "getHugeChunk")
+  sysAssert((cast[int](result) and PageMask) == 0, "getHugeChunk")
   result.next = nil
   result.prev = nil
   result.size = size
   # set 'used' to to true:
   result.prevSize = 1
+  result.owner = addr a
   incl(a, a.chunkStarts, pageIndex(result))
+  when RegionHasLock:
+    releaseSys a.lock
 
 proc freeHugeChunk(a: var MemRegion; c: PBigChunk) =
   let size = c.size
@@ -686,119 +792,180 @@ else:
   template trackSize(x) = discard
   template untrackSize(x) = discard
 
-when false:
-  # not yet used by the GCs
-  proc rawTryAlloc(a: var MemRegion; requestedSize: int): pointer =
-    sysAssert(allocInv(a), "rawAlloc: begin")
-    sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
-    sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
-    var size = roundup(requestedSize, MemAlign)
-    inc a.occ, size
-    trackSize(size)
-    sysAssert(size >= requestedSize, "insufficient allocated size!")
-    #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
-    if size <= SmallChunkSize-smallChunkOverhead():
-      # allocate a small block: for small chunks, we use only its next pointer
-      var s = size div MemAlign
-      var c = a.freeSmallChunks[s]
-      if c == nil:
-        result = nil
-      else:
-        sysAssert c.size == size, "rawAlloc 6"
-        if c.freeList == nil:
-          sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
-                    "rawAlloc 7")
-          result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
-          inc(c.acc, size)
-        else:
-          result = c.freeList
-          sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
-          c.freeList = c.freeList.next
-        dec(c.free, size)
-        sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
-        if c.free < size:
-          listRemove(a.freeSmallChunks[s], c)
-          sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-        sysAssert(((cast[ByteAddress](result) and PageMask) - smallChunkOverhead()) %%
-                  size == 0, "rawAlloc 21")
-        sysAssert(allocInv(a), "rawAlloc: end small size")
+proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
+  when RegionHasLock:
+    acquireSys a.lock
+  dec a.occ, c.size
+  untrackSize(c.size)
+  sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
+  when not defined(gcDestructors):
+    a.deleted = getBottom(a)
+    del(a, a.root, cast[int](addr(c.data)))
+  if c.size >= HugeChunkSize: freeHugeChunk(a, c)
+  else: freeBigChunk(a, c)
+  when RegionHasLock:
+    releaseSys a.lock
+
+when defined(gcDestructors):
+  template atomicPrepend(head, elem: untyped) =
+    # see also https://en.cppreference.com/w/cpp/atomic/atomic_compare_exchange
+    when hasThreadSupport:
+      while true:
+        elem.next.storea head.loada
+        if atomicCompareExchangeN(addr head, addr elem.next, elem, weak = true, ATOMIC_RELEASE, ATOMIC_RELAXED):
+          break
     else:
-      inc size, bigChunkOverhead()
-      var fl, sl: int
-      mappingSearch(size, fl, sl)
-      sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
-      let c = findSuitableBlock(a, fl, sl)
-      if c != nil:
-        removeChunkFromMatrix2(a, c, fl, sl)
-        if c.size >= size + PageSize:
-          splitChunk(a, c, size)
-        # set 'used' to to true:
-        c.prevSize = 1
-        incl(a, a.chunkStarts, pageIndex(c))
-        dec(a.freeMem, size)
-        result = addr(c.data)
-        sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13")
-        sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
-        if a.root == nil: a.root = getBottom(a)
-        add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size)
-      else:
-        result = nil
+      elem.next.storea head.loada
+      head.storea elem
+
+  proc addToSharedFreeListBigChunks(a: var MemRegion; c: PBigChunk) {.inline.} =
+    sysAssert c.next == nil, "c.next pointer must be nil"
+    atomicPrepend a.sharedFreeListBigChunks, c
+
+  proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell; size: int) {.inline.} =
+    atomicPrepend c.owner.sharedFreeLists[size], f
+
+  const MaxSteps = 20
+
+  proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
+    # rawDealloc did NOT do the usual:
+    # `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
+    # memory locations. We have to compensate here for these for the entire list.
+    var it = c.freeList
+    var total = 0
+    while it != nil:
+      inc total, size
+      let chunk = cast[PSmallChunk](pageAddr(it))
+      if c != chunk:
+        # The cell is foreign, potentially even from a foreign thread.
+        # It must block the current chunk from being freed, as doing so would leak memory.
+        inc c.foreignCells
+      it = it.next
+    # By not adjusting the foreign chunk we reserve space in it to prevent deallocation
+    inc(c.free, total)
+    dec(a.occ, total)
+
+  proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
+    var it = root
+    var maxIters = MaxSteps # make it time-bounded
+    while true:
+      let rest = it.next.loada
+      it.next.storea nil
+      deallocBigChunk(a, cast[PBigChunk](it))
+      if maxIters == 0:
+        if rest != nil:
+          addToSharedFreeListBigChunks(a, rest)
+          sysAssert a.sharedFreeListBigChunks != nil, "re-enqueing failed"
+        break
+      it = rest
+      dec maxIters
+      if it == nil: break
 
 proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   when defined(nimTypeNames):
     inc(a.allocCounter)
   sysAssert(allocInv(a), "rawAlloc: begin")
   sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
-  sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
   var size = roundup(requestedSize, MemAlign)
+  sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
   sysAssert(size >= requestedSize, "insufficient allocated size!")
   #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
+
   if size <= SmallChunkSize-smallChunkOverhead():
+    template fetchSharedCells(tc: PSmallChunk) =
+      # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
+      when defined(gcDestructors):
+        if tc.freeList == nil:
+          when hasThreadSupport:
+            # Steal the entire list from `sharedFreeList`:
+            tc.freeList = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED)
+          else:
+            tc.freeList = a.sharedFreeLists[s]
+            a.sharedFreeLists[s] = nil
+          # if `tc.freeList` isn't nil, `tc` will gain capacity.
+          # We must calculate how much it gained and how many foreign cells are included.
+          compensateCounters(a, tc, size)
+
     # allocate a small block: for small chunks, we use only its next pointer
-    var s = size div MemAlign
+    let s = size div MemAlign
     var c = a.freeSmallChunks[s]
     if c == nil:
+      # There is no free chunk of the requested size available, we need a new one.
       c = getSmallChunk(a)
+      # init all fields in case memory didn't get zeroed
       c.freeList = nil
+      c.foreignCells = 0
       sysAssert c.size == PageSize, "rawAlloc 3"
       c.size = size
-      c.acc = size
-      c.free = SmallChunkSize - smallChunkOverhead() - size
+      c.acc = size.uint32
+      c.free = SmallChunkSize - smallChunkOverhead() - size.int32
+      sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
       c.next = nil
       c.prev = nil
-      listAdd(a.freeSmallChunks[s], c)
+      # Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
+      # For those single cell chunks, we would otherwise have to allocate a new one almost every time.
+      fetchSharedCells(c)
+      if c.free >= size:
+        # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
+        #  we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
+        listAdd(a.freeSmallChunks[s], c)
       result = addr(c.data)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 4")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
     else:
+      # There is a free chunk of the requested size available, use it.
       sysAssert(allocInv(a), "rawAlloc: begin c != nil")
       sysAssert c.next != c, "rawAlloc 5"
       #if c.size != size:
       #  c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
+        sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
-        result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
+        result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
         inc(c.acc, size)
       else:
+        # There are free cells available, prefer them over the accumulator
         result = c.freeList
         when not defined(gcDestructors):
           sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
         c.freeList = c.freeList.next
+        if cast[PSmallChunk](pageAddr(result)) != c:
+          # This cell isn't a blocker for the current chunk's deallocation anymore
+          dec(c.foreignCells)
+        else:
+          sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
+      # Even if the cell we return is foreign, the local chunk's capacity decreases.
+      # The capacity was previously reserved in the source chunk (when it first got allocated),
+      #  then added into the current chunk during dealloc,
+      #  so the source chunk will not be freed or leak memory because of this.
       dec(c.free, size)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
       sysAssert(allocInv(a), "rawAlloc: end c != nil")
-    sysAssert(allocInv(a), "rawAlloc: before c.free < size")
-    if c.free < size:
-      sysAssert(allocInv(a), "rawAlloc: before listRemove test")
-      listRemove(a.freeSmallChunks[s], c)
-      sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-    sysAssert(((cast[ByteAddress](result) and PageMask) - smallChunkOverhead()) %%
+      # We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
+      # If after the adjustment it turns out there's free cells available,
+      #  the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
+      fetchSharedCells(c)
+      sysAssert(allocInv(a), "rawAlloc: before c.free < size")
+      if c.free < size:
+        # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
+        sysAssert(allocInv(a), "rawAlloc: before listRemove test")
+        listRemove(a.freeSmallChunks[s], c)
+        sysAssert(allocInv(a), "rawAlloc: end listRemove test")
+    sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
                size == 0, "rawAlloc 21")
     sysAssert(allocInv(a), "rawAlloc: end small size")
     inc a.occ, size
     trackSize(c.size)
   else:
+    when defined(gcDestructors):
+      when hasThreadSupport:
+        let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED)
+      else:
+        let deferredFrees = a.sharedFreeListBigChunks
+        a.sharedFreeListBigChunks = nil
+      if deferredFrees != nil:
+        freeDeferredObjects(a, deferredFrees)
+
     size = requestedSize + bigChunkOverhead() #  roundup(requestedSize+bigChunkOverhead(), PageSize)
     # allocate a large block
     var c = if size >= HugeChunkSize: getHugeChunk(a, size)
@@ -806,15 +973,16 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
     sysAssert c.prev == nil, "rawAlloc 10"
     sysAssert c.next == nil, "rawAlloc 11"
     result = addr(c.data)
-    sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13")
-    sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
-    if a.root == nil: a.root = getBottom(a)
-    add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size)
+    sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
+    sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
+    when not defined(gcDestructors):
+      if a.root == nil: a.root = getBottom(a)
+      add(a, a.root, cast[int](result), cast[int](result)+%size)
     inc a.occ, c.size
     trackSize(c.size)
   sysAssert(isAccessible(a, result), "rawAlloc 14")
   sysAssert(allocInv(a), "rawAlloc: end")
-  when logAlloc: cprintf("var pointer_%p = alloc(%ld)\n", result, requestedSize)
+  when logAlloc: cprintf("var pointer_%p = alloc(%ld) # %p\n", result, requestedSize, addr a)
 
 proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
   result = rawAlloc(a, requestedSize)
@@ -826,53 +994,77 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
   #sysAssert(isAllocatedPtr(a, p), "rawDealloc: no allocated pointer")
   sysAssert(allocInv(a), "rawDealloc: begin")
   var c = pageAddr(p)
+  sysAssert(c != nil, "rawDealloc: begin")
   if isSmallChunk(c):
     # `p` is within a small chunk:
     var c = cast[PSmallChunk](c)
-    var s = c.size
-    dec a.occ, s
-    untrackSize(s)
-    sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
-    sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
-               s == 0, "rawDealloc 3")
+    let s = c.size
+    #       ^ We might access thread foreign storage here.
+    # The other thread cannot possibly free this block as it's still alive.
     var f = cast[ptr FreeCell](p)
-    when not defined(gcDestructors):
-      #echo("setting to nil: ", $cast[ByteAddress](addr(f.zeroField)))
-      sysAssert(f.zeroField != 0, "rawDealloc 1")
-      f.zeroField = 0
-    f.next = c.freeList
-    c.freeList = f
-    when overwriteFree:
-      # set to 0xff to check for usage after free bugs:
-      nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
-               s -% sizeof(FreeCell))
-    # check if it is not in the freeSmallChunks[s] list:
-    if c.free < s:
-      # add it to the freeSmallChunks[s] array:
-      listAdd(a.freeSmallChunks[s div MemAlign], c)
-      inc(c.free, s)
+    if c.owner == addr(a):
+      # We own the block, there is no foreign thread involved.
+      dec a.occ, s
+      untrackSize(s)
+      sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
+      sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
+                s == 0, "rawDealloc 3")
+      when not defined(gcDestructors):
+        #echo("setting to nil: ", $cast[int](addr(f.zeroField)))
+        sysAssert(f.zeroField != 0, "rawDealloc 1")
+        f.zeroField = 0
+      when overwriteFree:
+        # set to 0xff to check for usage after free bugs:
+        nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
+                s -% sizeof(FreeCell))
+      let activeChunk = a.freeSmallChunks[s div MemAlign]
+      if activeChunk != nil and c != activeChunk:
+        # This pointer is not part of the active chunk, lend it out
+        #  and do not adjust the current chunk (same logic as compensateCounters.)
+        # Put the cell into the active chunk,
+        #  may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
+        #  This queue would otherwise waste memory in the form of free cells until we return to those chunks.
+        f.next = activeChunk.freeList
+        activeChunk.freeList = f # lend the cell
+        inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
+        inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
+      else:
+        f.next = c.freeList
+        c.freeList = f
+        if c.free < s:
+          # The chunk could not have been active as it didn't have enough space to give
+          listAdd(a.freeSmallChunks[s div MemAlign], c)
+          inc(c.free, s)
+        else:
+          inc(c.free, s)
+          # Free only if the entire chunk is unused and there are no borrowed cells.
+          # If the chunk were to be freed while it references foreign cells,
+          #  the foreign chunks will leak memory and can never be freed.
+          if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
+            listRemove(a.freeSmallChunks[s div MemAlign], c)
+            c.size = SmallChunkSize
+            freeBigChunk(a, cast[PBigChunk](c))
     else:
-      inc(c.free, s)
-      if c.free == SmallChunkSize-smallChunkOverhead():
-        listRemove(a.freeSmallChunks[s div MemAlign], c)
-        c.size = SmallChunkSize
-        freeBigChunk(a, cast[PBigChunk](c))
-    sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
+      when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
+
+      when defined(gcDestructors):
+        addToSharedFreeList(c, f, s div MemAlign)
+    sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
                s == 0, "rawDealloc 2")
   else:
     # set to 0xff to check for usage after free bugs:
     when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
-    # free big chunk
-    var c = cast[PBigChunk](c)
-    dec a.occ, c.size
-    untrackSize(c.size)
-    sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
-    a.deleted = getBottom(a)
-    del(a, a.root, cast[int](addr(c.data)))
-    if c.size >= HugeChunkSize: freeHugeChunk(a, c)
-    else: freeBigChunk(a, c)
+    when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner)
+    when defined(gcDestructors):
+      if c.owner == addr(a):
+        deallocBigChunk(a, cast[PBigChunk](c))
+      else:
+        addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
+    else:
+      deallocBigChunk(a, cast[PBigChunk](c))
+
   sysAssert(allocInv(a), "rawDealloc: end")
-  when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
+  #when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
 
 when not defined(gcDestructors):
   proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
@@ -881,9 +1073,9 @@ when not defined(gcDestructors):
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
+          var offset = (cast[int](p) and (PageSize-1)) -%
                       smallChunkOverhead()
-          result = (c.acc >% offset) and (offset %% c.size == 0) and
+          result = (c.acc.int >% offset) and (offset %% c.size == 0) and
             (cast[ptr FreeCell](p).zeroField >% 1)
         else:
           var c = cast[PBigChunk](c)
@@ -899,12 +1091,12 @@ when not defined(gcDestructors):
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
+          var offset = (cast[int](p) and (PageSize-1)) -%
                       smallChunkOverhead()
-          if c.acc >% offset:
-            sysAssert(cast[ByteAddress](addr(c.data)) +% offset ==
-                      cast[ByteAddress](p), "offset is not what you think it is")
-            var d = cast[ptr FreeCell](cast[ByteAddress](addr(c.data)) +%
+          if c.acc.int >% offset:
+            sysAssert(cast[int](addr(c.data)) +% offset ==
+                      cast[int](p), "offset is not what you think it is")
+            var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
                       offset -% (offset %% c.size))
             if d.zeroField >% 1:
               result = d
@@ -931,7 +1123,7 @@ when not defined(gcDestructors):
 
 proc ptrSize(p: pointer): int =
   when not defined(gcDestructors):
-    var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
     var c = pageAddr(p)
     sysAssert(not chunkUnused(c), "ptrSize")
     result = c.size -% sizeof(FreeCell)
@@ -949,7 +1141,7 @@ proc alloc(allocator: var MemRegion, size: Natural): pointer {.gcsafe.} =
     result = rawAlloc(allocator, size+sizeof(FreeCell))
     cast[ptr FreeCell](result).zeroField = 1 # mark it as used
     sysAssert(not isAllocatedPtr(allocator, result), "alloc")
-    result = cast[pointer](cast[ByteAddress](result) +% sizeof(FreeCell))
+    result = cast[pointer](cast[int](result) +% sizeof(FreeCell))
     track("alloc", result, size)
   else:
     result = rawAlloc(allocator, size)
@@ -961,7 +1153,7 @@ proc alloc0(allocator: var MemRegion, size: Natural): pointer =
 proc dealloc(allocator: var MemRegion, p: pointer) =
   when not defined(gcDestructors):
     sysAssert(p != nil, "dealloc: p is nil")
-    var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
     sysAssert(x != nil, "dealloc: x is nil")
     sysAssert(isAccessible(allocator, x), "is not accessible")
     sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
@@ -994,7 +1186,7 @@ proc deallocOsPages(a: var MemRegion) =
       let (p, size) = it.chunks[i]
       when defined(debugHeapLinks):
         cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a),
-          it, it.size, next)
+          it, size, next)
       sysAssert size >= PageSize, "origSize too small"
       osDeallocPages(p, size)
     it = next
@@ -1017,12 +1209,12 @@ when defined(nimTypeNames):
 template instantiateForRegion(allocator: untyped) {.dirty.} =
   {.push stackTrace: off.}
 
-  when defined(fulldebug):
+  when defined(nimFulldebug):
     proc interiorAllocatedPtr*(p: pointer): pointer =
       result = interiorAllocatedPtr(allocator, p)
 
     proc isAllocatedPtr*(p: pointer): bool =
-      let p = cast[pointer](cast[ByteAddress](p)-%ByteAddress(sizeof(Cell)))
+      let p = cast[pointer](cast[int](p)-%ByteAddress(sizeof(Cell)))
       result = isAllocatedPtr(allocator, p)
 
   proc deallocOsPages = deallocOsPages(allocator)
@@ -1042,7 +1234,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
   proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
     result = realloc(allocator, p, newSize)
     if newSize > oldSize:
-      zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+      zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
 
   when false:
     proc countFreeMem(): int =
@@ -1052,25 +1244,34 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
         inc(result, it.size)
         it = it.next
 
+  when hasThreadSupport and not defined(gcDestructors):
+    proc addSysExitProc(quitProc: proc() {.noconv.}) {.importc: "atexit", header: "<stdlib.h>".}
+
+    var sharedHeap: MemRegion
+    var heapLock: SysLock
+    initSysLock(heapLock)
+    addSysExitProc(proc() {.noconv.} = deinitSys(heapLock))
+
   proc getFreeMem(): int =
-    result = allocator.freeMem
     #sysAssert(result == countFreeMem())
+    result = allocator.freeMem
 
-  proc getTotalMem(): int = return allocator.currMem
-  proc getOccupiedMem(): int = return allocator.occ #getTotalMem() - getFreeMem()
-  proc getMaxMem*(): int = return getMaxMem(allocator)
+  proc getTotalMem(): int =
+    result = allocator.currMem
+
+  proc getOccupiedMem(): int =
+    result = allocator.occ #getTotalMem() - getFreeMem()
+
+  proc getMaxMem*(): int =
+    result = getMaxMem(allocator)
 
   when defined(nimTypeNames):
     proc getMemCounters*(): (int, int) = getMemCounters(allocator)
 
   # -------------------- shared heap region ----------------------------------
-  when hasThreadSupport:
-    var sharedHeap: MemRegion
-    var heapLock: SysLock
-    initSysLock(heapLock)
 
   proc allocSharedImpl(size: Natural): pointer =
-    when hasThreadSupport:
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       result = alloc(sharedHeap, size)
       releaseSys(heapLock)
@@ -1082,7 +1283,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
     zeroMem(result, size)
 
   proc deallocSharedImpl(p: pointer) =
-    when hasThreadSupport:
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       dealloc(sharedHeap, p)
       releaseSys(heapLock)
@@ -1090,7 +1291,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
       deallocImpl(p)
 
   proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
-    when hasThreadSupport:
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       result = realloc(sharedHeap, p, newSize)
       releaseSys(heapLock)
@@ -1098,7 +1299,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
       result = reallocImpl(p, newSize)
 
   proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer =
-    when hasThreadSupport:
+    when hasThreadSupport and not defined(gcDestructors):
       acquireSys(heapLock)
       result = realloc0(sharedHeap, p, oldSize, newSize)
       releaseSys(heapLock)
@@ -1106,20 +1307,31 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
       result = realloc0Impl(p, oldSize, newSize)
 
   when hasThreadSupport:
-    template sharedMemStatsShared(v: int) =
-      acquireSys(heapLock)
-      result = v
-      releaseSys(heapLock)
+    when defined(gcDestructors):
+      proc getFreeSharedMem(): int =
+        allocator.freeMem
+
+      proc getTotalSharedMem(): int =
+        allocator.currMem
+
+      proc getOccupiedSharedMem(): int =
+        allocator.occ
+
+    else:
+      template sharedMemStatsShared(v: int) =
+        acquireSys(heapLock)
+        result = v
+        releaseSys(heapLock)
 
-    proc getFreeSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.freeMem)
+      proc getFreeSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.freeMem)
 
-    proc getTotalSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.currMem)
+      proc getTotalSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.currMem)
 
-    proc getOccupiedSharedMem(): int =
-      sharedMemStatsShared(sharedHeap.occ)
-      #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
+      proc getOccupiedSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.occ)
+        #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
   {.pop.}
 
 {.pop.}
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 0b4b25992..3098e17d6 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -12,8 +12,6 @@
 # All symbols are prefixed with 'c_' to avoid ambiguities
 
 {.push hints:off, stack_trace: off, profiler: off.}
-when not defined(nimHasHotCodeReloading):
-  {.pragma: nonReloadable.}
 
 proc c_memchr*(s: pointer, c: cint, n: csize_t): pointer {.
   importc: "memchr", header: "<string.h>".}
@@ -33,7 +31,10 @@ proc c_abort*() {.
   importc: "abort", header: "<stdlib.h>", noSideEffect, noreturn.}
 
 
-when defined(linux) and defined(amd64):
+when defined(nimBuiltinSetjmp):
+  type
+    C_JmpBuf* = array[5, pointer]
+elif defined(linux) and defined(amd64):
   type
     C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>", bycopy.} = object
         abi: array[200 div sizeof(clong), clong]
@@ -42,6 +43,7 @@ else:
     C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>".} = object
 
 
+type CSighandlerT = proc (a: cint) {.noconv.}
 when defined(windows):
   const
     SIGABRT* = cint(22)
@@ -50,6 +52,7 @@ when defined(windows):
     SIGINT* = cint(2)
     SIGSEGV* = cint(11)
     SIGTERM = cint(15)
+    SIG_DFL* = cast[CSighandlerT](0)
 elif defined(macosx) or defined(linux) or defined(freebsd) or
      defined(openbsd) or defined(netbsd) or defined(solaris) or
      defined(dragonfly) or defined(nintendoswitch) or defined(genode) or
@@ -62,6 +65,7 @@ elif defined(macosx) or defined(linux) or defined(freebsd) or
     SIGSEGV* = cint(11)
     SIGTERM* = cint(15)
     SIGPIPE* = cint(13)
+    SIG_DFL* = CSighandlerT(nil)
 elif defined(haiku):
   const
     SIGABRT* = cint(6)
@@ -71,8 +75,9 @@ elif defined(haiku):
     SIGSEGV* = cint(11)
     SIGTERM* = cint(15)
     SIGPIPE* = cint(7)
+    SIG_DFL* = CSighandlerT(nil)
 else:
-  when NoFakeVars:
+  when defined(nimscript):
     {.error: "SIGABRT not ported to your platform".}
   else:
     var
@@ -81,6 +86,7 @@ else:
       SIGABRT* {.importc: "SIGABRT", nodecl.}: cint
       SIGFPE* {.importc: "SIGFPE", nodecl.}: cint
       SIGILL* {.importc: "SIGILL", nodecl.}: cint
+      SIG_DFL* {.importc: "SIG_DFL", nodecl.}: CSighandlerT
     when defined(macosx) or defined(linux):
       var SIGPIPE* {.importc: "SIGPIPE", nodecl.}: cint
 
@@ -89,27 +95,67 @@ when defined(macosx):
 elif defined(haiku):
   const SIGBUS* = cint(30)
 
-when defined(nimSigSetjmp) and not defined(nimStdSetjmp):
+# "nimRawSetjmp" is defined by default for certain platforms, so we need the
+# "nimStdSetjmp" escape hatch with it.
+when defined(nimSigSetjmp):
   proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
     header: "<setjmp.h>", importc: "siglongjmp".}
-  template c_setjmp*(jmpb: C_JmpBuf): cint =
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
     proc c_sigsetjmp(jmpb: C_JmpBuf, savemask: cint): cint {.
       header: "<setjmp.h>", importc: "sigsetjmp".}
     c_sigsetjmp(jmpb, 0)
+elif defined(nimBuiltinSetjmp):
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) =
+    # Apple's Clang++ has trouble converting array names to pointers, so we need
+    # to be very explicit here.
+    proc c_builtin_longjmp(jmpb: ptr pointer, retval: cint) {.
+      importc: "__builtin_longjmp", nodecl.}
+    # The second parameter needs to be 1 and sometimes the C/C++ compiler checks it.
+    c_builtin_longjmp(unsafeAddr jmpb[0], 1)
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
+    proc c_builtin_setjmp(jmpb: ptr pointer): cint {.
+      importc: "__builtin_setjmp", nodecl.}
+    c_builtin_setjmp(unsafeAddr jmpb[0])
+
 elif defined(nimRawSetjmp) and not defined(nimStdSetjmp):
-  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
-    header: "<setjmp.h>", importc: "_longjmp".}
-  proc c_setjmp*(jmpb: C_JmpBuf): cint {.
-    header: "<setjmp.h>", importc: "_setjmp".}
+  when defined(windows):
+    # No `_longjmp()` on Windows.
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "longjmp".}
+    when defined(vcc) or defined(clangcl):
+      proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+        header: "<setjmp.h>", importc: "setjmp".}
+    else:
+      # The Windows `_setjmp()` takes two arguments, with the second being an
+      # undocumented buffer used by the SEH mechanism for stack unwinding.
+      # Mingw-w64 has been trying to get it right for years, but it's still
+      # prone to stack corruption during unwinding, so we disable that by setting
+      # it to NULL.
+      # More details: https://github.com/status-im/nimbus-eth2/issues/3121
+      when defined(nimHasStyleChecks):
+        {.push styleChecks: off.}
+
+      proc c_setjmp*(jmpb: C_JmpBuf): cint =
+        proc c_setjmp_win(jmpb: C_JmpBuf, ctx: pointer): cint {.
+          header: "<setjmp.h>", importc: "_setjmp".}
+        c_setjmp_win(jmpb, nil)
+
+      when defined(nimHasStyleChecks):
+        {.pop.}
+  else:
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "_longjmp".}
+    proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+      header: "<setjmp.h>", importc: "_setjmp".}
 else:
   proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
     header: "<setjmp.h>", importc: "longjmp".}
   proc c_setjmp*(jmpb: C_JmpBuf): cint {.
     header: "<setjmp.h>", importc: "setjmp".}
 
-type CSighandlerT = proc (a: cint) {.noconv.}
-proc c_signal*(sign: cint, handler: proc (a: cint) {.noconv.}): CSighandlerT {.
+proc c_signal*(sign: cint, handler: CSighandlerT): CSighandlerT {.
   importc: "signal", header: "<signal.h>", discardable.}
+proc c_raise*(sign: cint): cint {.importc: "raise", header: "<signal.h>".}
 
 type
   CFile {.importc: "FILE", header: "<stdio.h>",
@@ -134,29 +180,54 @@ proc c_printf*(frmt: cstring): cint {.
 
 proc c_fputs*(c: cstring, f: CFilePtr): cint {.
   importc: "fputs", header: "<stdio.h>", discardable.}
+proc c_fputc*(c: char, f: CFilePtr): cint {.
+  importc: "fputc", header: "<stdio.h>", discardable.}
 
 proc c_sprintf*(buf, frmt: cstring): cint {.
   importc: "sprintf", header: "<stdio.h>", varargs, noSideEffect.}
   # we use it only in a way that cannot lead to security issues
 
-proc c_malloc*(size: csize_t): pointer {.
-  importc: "malloc", header: "<stdlib.h>".}
-proc c_calloc*(nmemb, size: csize_t): pointer {.
-  importc: "calloc", header: "<stdlib.h>".}
-proc c_free*(p: pointer) {.
-  importc: "free", header: "<stdlib.h>".}
-proc c_realloc*(p: pointer, newsize: csize_t): pointer {.
-  importc: "realloc", header: "<stdlib.h>".}
-
-proc c_fwrite*(buf: pointer, size, n: csize_t, f: CFilePtr): cint {.
+proc c_snprintf*(buf: cstring, n: csize_t, frmt: cstring): cint {.
+  importc: "snprintf", header: "<stdio.h>", varargs, noSideEffect.}
+
+when defined(zephyr) and not defined(zephyrUseLibcMalloc):
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "k_malloc", header: "<kernel.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "k_calloc", header: "<kernel.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "k_free", header: "<kernel.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer =
+    # Zephyr's kernel malloc doesn't support realloc
+    result = c_malloc(newSize)
+    # match the ansi c behavior
+    if not result.isNil():
+      copyMem(result, p, newSize)
+      c_free(p)
+else:
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "malloc", header: "<stdlib.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "calloc", header: "<stdlib.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "free", header: "<stdlib.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer {.
+    importc: "realloc", header: "<stdlib.h>".}
+
+proc c_fwrite*(buf: pointer, size, n: csize_t, f: CFilePtr): csize_t {.
   importc: "fwrite", header: "<stdio.h>".}
 
-proc c_fflush(f: CFilePtr): cint {.
+proc c_fflush*(f: CFilePtr): cint {.
   importc: "fflush", header: "<stdio.h>".}
 
+proc rawWriteString*(f: CFilePtr, s: cstring, length: int) {.compilerproc, nonReloadable, inline.} =
+  # we cannot throw an exception here!
+  discard c_fwrite(s, 1, cast[csize_t](length), f)
+  discard c_fflush(f)
+
 proc rawWrite*(f: CFilePtr, s: cstring) {.compilerproc, nonReloadable, inline.} =
   # we cannot throw an exception here!
-  discard c_fwrite(s, 1, cast[csize_t](s.len), f)
+  discard c_fwrite(s, 1, c_strlen(s), f)
   discard c_fflush(f)
 
 {.pop.}
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
new file mode 100644
index 000000000..d001fcaa5
--- /dev/null
+++ b/lib/system/arc.nim
@@ -0,0 +1,267 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#[
+In this new runtime we simplify the object layouts a bit: The runtime type
+information is only accessed for the objects that have it and it's always
+at offset 0 then. The ``ref`` object header is independent from the
+runtime type and only contains a reference count.
+]#
+
+when defined(gcOrc):
+  const
+    rcIncrement = 0b10000 # so that lowest 4 bits are not touched
+    rcMask = 0b1111
+    rcShift = 4      # shift by rcShift to get the reference counter
+
+else:
+  const
+    rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+    rcMask = 0b111
+    rcShift = 3      # shift by rcShift to get the reference counter
+
+const
+  orcLeakDetector = defined(nimOrcLeakDetector)
+
+type
+  RefHeader = object
+    rc: int # the object header is now a single RC field.
+            # we could remove it in non-debug builds for the 'owned ref'
+            # design but this seems unwise.
+    when defined(gcOrc):
+      rootIdx: int # thanks to this we can delete potential cycle roots
+                   # in O(1) without doubly linked lists
+    when defined(nimArcDebug) or defined(nimArcIds):
+      refId: int
+    when defined(gcOrc) and orcLeakDetector:
+      filename: cstring
+      line: int
+
+  Cell = ptr RefHeader
+
+template setFrameInfo(c: Cell) =
+  when orcLeakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
+template head(p: pointer): Cell =
+  cast[Cell](cast[int](p) -% sizeof(RefHeader))
+
+const
+  traceCollector = defined(traceArc)
+
+when defined(nimArcDebug):
+  include cellsets
+
+  const traceId = 20 # 1037
+
+  var gRefId: int
+  var freedCells: CellSet
+elif defined(nimArcIds):
+  var gRefId: int
+
+  const traceId = -1
+
+when defined(gcAtomicArc) and hasThreadSupport:
+  template decrement(cell: Cell): untyped =
+    discard atomicDec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    discard atomicInc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    atomicLoadN(x.rc.addr, ATOMIC_ACQUIRE) shr rcShift
+else:
+  template decrement(cell: Cell): untyped =
+    dec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    inc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    x.rc shr rcShift
+
+proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = alignedAlloc0(s, alignment) +! hdrSize
+  when defined(nimArcDebug) or defined(nimArcIds):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObj] %p %ld\n", result, head(result).count)
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
+  # Same as 'newNewObj' but do not initialize the memory to zero.
+  # The codegen proved for us that this is not necessary.
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
+  head(result).rc = 0
+  when defined(gcOrc):
+    head(result).rootIdx = 0
+  when defined(nimArcDebug):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObjUninit] %p %ld\n", result, head(result).count)
+
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimDecWeakRef(p: pointer) {.compilerRtl, inl.} =
+  decrement head(p)
+
+proc isUniqueRef*[T](x: ref T): bool {.inline.} =
+  ## Returns true if the object `x` points to is uniquely referenced. Such
+  ## an object can potentially be passed over to a different thread safely,
+  ## if great care is taken. This queries the internal reference count of
+  ## the object which is subject to lots of optimizations! In other words
+  ## the value of `isUniqueRef` can depend on the used compiler version and
+  ## optimizer setting.
+  ## Nevertheless it can be used as a very valuable debugging tool and can
+  ## be used to specify the constraints of a threading related API
+  ## via `assert isUniqueRef(x)`.
+  head(cast[pointer](x)).rc == 0
+
+proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
+  when defined(nimArcDebug):
+    if head(p).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).count)
+
+  increment head(p)
+  when traceCollector:
+    cprintf("[INCREF] %p\n", head(p))
+
+when not defined(gcOrc) or defined(nimThinout):
+  proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+    # This is only used by the old RTTI mechanism and we know
+    # that 'dest[]' is nil and needs no destruction. Which is really handy
+    # as we cannot destroy the object reliably if it's an object of unknown
+    # compile-time type.
+    dest[] = src
+    if src != nil: nimIncRef src
+
+when not defined(nimscript) and defined(nimArcDebug):
+  proc deallocatedRefId*(p: pointer): int =
+    ## Returns the ref's ID if the ref was already deallocated. This
+    ## is a memory corruption check. Returns 0 if there is no error.
+    let c = head(p)
+    if freedCells.data != nil and freedCells.contains(c):
+      result = c.refId
+    else:
+      result = 0
+
+proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
+  when not defined(nimscript):
+    when traceCollector:
+      cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
+    when defined(nimOwnedEnabled):
+      if head(p).rc >= rcIncrement:
+        cstderr.rawWrite "[FATAL] dangling references exist\n"
+        rawQuit 1
+    when defined(nimArcDebug):
+      # we do NOT really free the memory here in order to reliably detect use-after-frees
+      if freedCells.data == nil: init(freedCells)
+      freedCells.incl head(p)
+    else:
+      let hdrSize = align(sizeof(RefHeader), alignment)
+      alignedDealloc(p -! hdrSize, alignment)
+
+template `=dispose`*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
+#proc dispose*(x: pointer) = nimRawDispose(x)
+
+proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
+  let rti = cast[ptr PNimTypeV2](p)
+  if rti.destructor != nil:
+    cast[DestructorProc](rti.destructor)(p)
+  when false:
+    cstderr.rawWrite cast[ptr PNimTypeV2](p)[].name
+    cstderr.rawWrite "\n"
+    if d == nil:
+      cstderr.rawWrite "bah, nil\n"
+    else:
+      cstderr.rawWrite "has destructor!\n"
+  nimRawDispose(p, rti.align)
+
+when defined(gcOrc):
+  when defined(nimThinout):
+    include cyclebreaker
+  else:
+    include orc
+    #include cyclecollector
+
+proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+
+    when defined(nimArcDebug):
+      if cell.refId == traceId:
+        writeStackTrace()
+        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)
+
+    when defined(gcAtomicArc) and hasThreadSupport:
+      # `atomicDec` returns the new value
+      if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+    else:
+      if cell.count == 0:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+      else:
+        decrement cell
+        # According to Lins it's correct to do nothing else here.
+        when traceCollector:
+          cprintf("[DECREF] %p\n", cell)
+
+proc GC_unref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  var y {.cursor.} = x
+  `=destroy`(y)
+
+proc GC_ref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  if x != nil: nimIncRef(cast[pointer](x))
+
+when not defined(gcOrc):
+  template GC_fullCollect* =
+    ## Forces a full garbage collection pass. With `--mm:arc` a nop.
+    discard
+
+template setupForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+template tearDownForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+proc isObjDisplayCheck(source: PNimTypeV2, targetDepth: int16, token: uint32): bool {.compilerRtl, inl.} =
+  result = targetDepth <= source.depth and source.display[targetDepth] == token
+
+when defined(gcDestructors):
+  proc nimGetVTable(p: pointer, index: int): pointer
+        {.compilerRtl, inline, raises: [].} =
+    result = cast[ptr PNimTypeV2](p).vTable[index]
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
deleted file mode 100644
index 64caddce8..000000000
--- a/lib/system/arithm.nim
+++ /dev/null
@@ -1,425 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# simple integer arithmetic with overflow checking
-
-proc raiseOverflow {.compilerproc, noinline.} =
-  # a single proc to reduce code size to a minimum
-  sysFatal(OverflowDefect, "over- or underflow")
-
-proc raiseDivByZero {.compilerproc, noinline.} =
-  sysFatal(DivByZeroDefect, "division by zero")
-
-when defined(builtinOverflow):
-  # Builtin compiler functions for improved performance
-  when sizeof(clong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
-
-  elif sizeof(clonglong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
-
-  when sizeof(int) == 8:
-    proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      addInt64Overflow(a, b, c)
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      subInt64Overflow(a, b, c)
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      mulInt64Overflow(a, b, c)
-
-  elif sizeof(int) == 4 and sizeof(cint) == 4:
-    proc addIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
-
-  proc addInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if addInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if subInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if mulInt64Overflow(a, b, result):
-      raiseOverflow()
-else:
-  proc addInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    result = a +% b
-    if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    result = a -% b
-    if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if they're
-  # approximately the same. Else, because the native int product is the only
-  # one that can lose catastrophic amounts of information, it's the native int
-  # product that must have overflowed.
-  #
-  proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-    var
-      resAsFloat, floatProd: float64
-    result = a *% b
-    floatProd = toBiggestFloat(a) # conversion
-    floatProd = floatProd * toBiggestFloat(b)
-    resAsFloat = toBiggestFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-proc negInt64(a: int64): int64 {.compilerproc, inline.} =
-  if a != low(int64): return -a
-  raiseOverflow()
-
-proc absInt64(a: int64): int64 {.compilerproc, inline.} =
-  if a != low(int64):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-proc divInt64(a, b: int64): int64 {.compilerproc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  if a == low(int64) and b == int64(-1):
-    raiseOverflow()
-  return a div b
-
-proc modInt64(a, b: int64): int64 {.compilerproc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  return a mod b
-
-proc absInt(a: int): int {.compilerproc, inline.} =
-  if a != low(int):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-const
-  asmVersion = defined(I386) and (defined(vcc) or defined(wcc) or
-               defined(dmc) or defined(gcc) or defined(llvm_gcc))
-    # my Version of Borland C++Builder does not have
-    # tasm32, which is needed for assembler blocks
-    # this is why Borland is not included in the 'when'
-
-when asmVersion and not defined(gcc) and not defined(llvm_gcc):
-  # assembler optimized versions for compilers that
-  # have an intel syntax assembler:
-  proc addInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    # a in eax, and b in edx
-    asm """
-        mov eax, ecx
-        add eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc subInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        sub eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc negInt(a: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        neg eax
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc divInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        test  edx, edx
-        jne   L_NOT_ZERO
-        call  `raiseDivByZero`
-      L_NOT_ZERO:
-        cmp   ecx, 0x80000000
-        jne   L_DO_DIV
-        cmp   edx, -1
-        jne   L_DO_DIV
-        call  `raiseOverflow`
-      L_DO_DIV:
-        mov   eax, ecx
-        mov   ecx, edx
-        cdq
-        idiv  ecx
-        ret
-    """
-
-  proc modInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        test  edx, edx
-        jne   L_NOT_ZERO
-        call  `raiseDivByZero`
-      L_NOT_ZERO:
-        cmp   ecx, 0x80000000
-        jne   L_DO_DIV
-        cmp   edx, -1
-        jne   L_DO_DIV
-        call  `raiseOverflow`
-      L_DO_DIV:
-        mov   eax, ecx
-        mov   ecx, edx
-        cdq
-        idiv  ecx
-        mov   eax, edx
-        ret
-    """
-
-  proc mulInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        mov ecx, edx
-        xor edx, edx
-        imul ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    # don't use a pure proc here!
-    asm """
-      "addl %%ecx, %%eax\n"
-      "jno 1\n"
-      "call _raiseOverflow\n"
-      "1: \n"
-      :"=a"(`result`)
-      :"a"(`a`), "c"(`b`)
-    """
-    #".intel_syntax noprefix"
-    #/* Intel syntax here */
-    #".att_syntax"
-
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    asm """ "subl %%ecx,%%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`), "c"(`b`)
-    """
-
-  proc mulInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "imull %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc negInt(a: int): int {.compilerproc, inline.} =
-    asm """ "negl %%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`)
-    """
-
-  proc divInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc modInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-             "movl %%edx, %%eax"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-when not declared(addInt) and defined(builtinOverflow):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    if addIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(subInt) and defined(builtinOverflow):
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    if subIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(mulInt) and defined(builtinOverflow):
-  proc mulInt(a, b: int): int {.compilerproc, inline.} =
-    if mulIntOverflow(a, b, result):
-      raiseOverflow()
-
-# Platform independent versions of the above (slower!)
-when not declared(addInt):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    result = a +% b
-    if (result xor a) >= 0 or (result xor b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(subInt):
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    result = a -% b
-    if (result xor a) >= 0 or (result xor not b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(negInt):
-  proc negInt(a: int): int {.compilerproc, inline.} =
-    if a != low(int): return -a
-    raiseOverflow()
-
-when not declared(divInt):
-  proc divInt(a, b: int): int {.compilerproc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    if a == low(int) and b == -1:
-      raiseOverflow()
-    return a div b
-
-when not declared(modInt):
-  proc modInt(a, b: int): int {.compilerproc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    return a mod b
-
-when not declared(mulInt):
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if
-  # they're approximately the same. Else, because the native int product is
-  # the only one that can lose catastrophic amounts of information, it's the
-  # native int product that must have overflowed.
-  #
-  proc mulInt(a, b: int): int {.compilerproc.} =
-    var
-      resAsFloat, floatProd: float
-
-    result = a *% b
-    floatProd = toFloat(a) * toFloat(b)
-    resAsFloat = toFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-# We avoid setting the FPU control word here for compatibility with libraries
-# written in other languages.
-
-proc raiseFloatInvalidOp {.compilerproc, noinline.} =
-  sysFatal(FloatInvalidOpDefect, "FPU operation caused a NaN result")
-
-proc nanCheck(x: float64) {.compilerproc, inline.} =
-  if x != x: raiseFloatInvalidOp()
-
-proc raiseFloatOverflow(x: float64) {.compilerproc, noinline.} =
-  if x > 0.0:
-    sysFatal(FloatOverflowDefect, "FPU operation caused an overflow")
-  else:
-    sysFatal(FloatUnderflowDefect, "FPU operations caused an underflow")
-
-proc infCheck(x: float64) {.compilerproc, inline.} =
-  if x != 0.0 and x*0.5 == x: raiseFloatOverflow(x)
diff --git a/lib/system/arithmetics.nim b/lib/system/arithmetics.nim
index f6c1b69ff..e229a0f4b 100644
--- a/lib/system/arithmetics.nim
+++ b/lib/system/arithmetics.nim
@@ -1,157 +1,50 @@
-proc succ*[T: Ordinal](x: T, y = 1): T {.magic: "Succ", noSideEffect.}
-  ## Returns the ``y``-th successor (default: 1) of the value ``x``.
-  ## ``T`` has to be an `ordinal type <#Ordinal>`_.
+proc succ*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Succ", noSideEffect.} =
+  ## Returns the `y`-th successor (default: 1) of the value `x`.
   ##
-  ## If such a value does not exist, ``OverflowDefect`` is raised
+  ## If such a value does not exist, `OverflowDefect` is raised
   ## or a compile time error occurs.
-  ##
-  ## .. code-block:: Nim
-  ##   let x = 5
-  ##   echo succ(5)    # => 6
-  ##   echo succ(5, 3) # => 8
-
-proc pred*[T: Ordinal](x: T, y = 1): T {.magic: "Pred", noSideEffect.}
-  ## Returns the ``y``-th predecessor (default: 1) of the value ``x``.
-  ## ``T`` has to be an `ordinal type <#Ordinal>`_.
-  ##
-  ## If such a value does not exist, ``OverflowDefect`` is raised
-  ## or a compile time error occurs.
-  ##
-  ## .. code-block:: Nim
-  ##   let x = 5
-  ##   echo pred(5)    # => 4
-  ##   echo pred(5, 3) # => 2
+  runnableExamples:
+    assert succ(5) == 6
+    assert succ(5, 3) == 8
 
-proc inc*[T: Ordinal](x: var T, y = 1) {.magic: "Inc", noSideEffect.}
-  ## Increments the ordinal ``x`` by ``y``.
+proc pred*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Pred", noSideEffect.} =
+  ## Returns the `y`-th predecessor (default: 1) of the value `x`.
   ##
-  ## If such a value does not exist, ``OverflowDefect`` is raised or a compile
-  ## time error occurs. This is a short notation for: ``x = succ(x, y)``.
-  ##
-  ## .. code-block:: Nim
-  ##  var i = 2
-  ##  inc(i)    # i <- 3
-  ##  inc(i, 3) # i <- 6
+  ## If such a value does not exist, `OverflowDefect` is raised
+  ## or a compile time error occurs.
+  runnableExamples:
+    assert pred(5) == 4
+    assert pred(5, 3) == 2
 
-proc dec*[T: Ordinal](x: var T, y = 1) {.magic: "Dec", noSideEffect.}
-  ## Decrements the ordinal ``x`` by ``y``.
+proc inc*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Inc", noSideEffect.} =
+  ## Increments the ordinal `x` by `y`.
   ##
-  ## If such a value does not exist, ``OverflowDefect`` is raised or a compile
-  ## time error occurs. This is a short notation for: ``x = pred(x, y)``.
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = succ(x, y)`.
+  runnableExamples:
+    var i = 2
+    inc(i)
+    assert i == 3
+    inc(i, 3)
+    assert i == 6
+
+proc dec*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Dec", noSideEffect.} =
+  ## Decrements the ordinal `x` by `y`.
   ##
-  ## .. code-block:: Nim
-  ##  var i = 2
-  ##  dec(i)    # i <- 1
-  ##  dec(i, 3) # i <- -2
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = pred(x, y)`.
+  runnableExamples:
+    var i = 2
+    dec(i)
+    assert i == 1
+    dec(i, 3)
+    assert i == -2
 
 
 
 # --------------------------------------------------------------------------
 # built-in operators
 
-when defined(nimNoZeroExtendMagic):
-  proc ze*(x: int8): int {.deprecated.} =
-    ## zero extends a smaller integer type to ``int``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int](uint(cast[uint8](x)))
-
-  proc ze*(x: int16): int {.deprecated.} =
-    ## zero extends a smaller integer type to ``int``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int](uint(cast[uint16](x)))
-
-  proc ze64*(x: int8): int64 {.deprecated.} =
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int64](uint64(cast[uint8](x)))
-
-  proc ze64*(x: int16): int64 {.deprecated.} =
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int64](uint64(cast[uint16](x)))
-
-  proc ze64*(x: int32): int64 {.deprecated.} =
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int64](uint64(cast[uint32](x)))
-
-  proc ze64*(x: int): int64 {.deprecated.} =
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned. Does nothing if the size of an ``int`` is the same as ``int64``.
-    ## (This is the case on 64 bit processors.)
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int64](uint64(cast[uint](x)))
-
-  proc toU8*(x: int): int8 {.deprecated.} =
-    ## treats `x` as unsigned and converts it to a byte by taking the last 8 bits
-    ## from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int8](x)
-
-  proc toU16*(x: int): int16 {.deprecated.} =
-    ## treats `x` as unsigned and converts it to an ``int16`` by taking the last
-    ## 16 bits from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int16](x)
-
-  proc toU32*(x: int64): int32 {.deprecated.} =
-    ## treats `x` as unsigned and converts it to an ``int32`` by taking the
-    ## last 32 bits from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-    cast[int32](x)
-
-elif not defined(js):
-  proc ze*(x: int8): int {.magic: "Ze8ToI", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc ze*(x: int16): int {.magic: "Ze16ToI", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc ze64*(x: int8): int64 {.magic: "Ze8ToI64", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc ze64*(x: int16): int64 {.magic: "Ze16ToI64", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc ze64*(x: int32): int64 {.magic: "Ze32ToI64", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc ze64*(x: int): int64 {.magic: "ZeIToI64", noSideEffect, deprecated.}
-    ## zero extends a smaller integer type to ``int64``. This treats `x` as
-    ## unsigned. Does nothing if the size of an ``int`` is the same as ``int64``.
-    ## (This is the case on 64 bit processors.)
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc toU8*(x: int): int8 {.magic: "ToU8", noSideEffect, deprecated.}
-    ## treats `x` as unsigned and converts it to a byte by taking the last 8 bits
-    ## from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc toU16*(x: int): int16 {.magic: "ToU16", noSideEffect, deprecated.}
-    ## treats `x` as unsigned and converts it to an ``int16`` by taking the last
-    ## 16 bits from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-  proc toU32*(x: int64): int32 {.magic: "ToU32", noSideEffect, deprecated.}
-    ## treats `x` as unsigned and converts it to an ``int32`` by taking the
-    ## last 32 bits from `x`.
-    ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
 # integer calculations:
 proc `+`*(x: int): int {.magic: "UnaryPlusI", noSideEffect.}
   ## Unary `+` operator for an integer. Has no effect.
@@ -167,20 +60,13 @@ proc `-`*(x: int16): int16 {.magic: "UnaryMinusI", noSideEffect.}
 proc `-`*(x: int32): int32 {.magic: "UnaryMinusI", noSideEffect.}
 proc `-`*(x: int64): int64 {.magic: "UnaryMinusI64", noSideEffect.}
 
-proc `not`*(x: int): int {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int): int {.magic: "BitnotI", noSideEffect.} =
   ## Computes the `bitwise complement` of the integer `x`.
-  ##
-  ## .. code-block:: Nim
-  ##   var
-  ##     a = 0'u8
-  ##     b = 0'i8
-  ##     c = 1000'u16
-  ##     d = 1000'i16
-  ##
-  ##   echo not a # => 255
-  ##   echo not b # => -1
-  ##   echo not c # => 64535
-  ##   echo not d # => -1001
+  runnableExamples:
+    assert not 0'u8 == 255
+    assert not 0'i8 == -1
+    assert not 1000'u16 == 64535
+    assert not 1000'i16 == -1001
 proc `not`*(x: int8): int8 {.magic: "BitnotI", noSideEffect.}
 proc `not`*(x: int16): int16 {.magic: "BitnotI", noSideEffect.}
 proc `not`*(x: int32): int32 {.magic: "BitnotI", noSideEffect.}
@@ -207,40 +93,38 @@ proc `*`*(x, y: int16): int16 {.magic: "MulI", noSideEffect.}
 proc `*`*(x, y: int32): int32 {.magic: "MulI", noSideEffect.}
 proc `*`*(x, y: int64): int64 {.magic: "MulI", noSideEffect.}
 
-proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.} =
   ## Computes the integer division.
   ##
-  ## This is roughly the same as ``trunc(x/y)``.
-  ##
-  ## .. code-block:: Nim
-  ##   ( 1 div  2) ==  0
-  ##   ( 2 div  2) ==  1
-  ##   ( 3 div  2) ==  1
-  ##   ( 7 div  3) ==  2
-  ##   (-7 div  3) == -2
-  ##   ( 7 div -3) == -2
-  ##   (-7 div -3) ==  2
+  ## This is roughly the same as `math.trunc(x/y).int`.
+  runnableExamples:
+    assert (1 div 2) == 0
+    assert (2 div 2) == 1
+    assert (3 div 2) == 1
+    assert (7 div 3) == 2
+    assert (-7 div 3) == -2
+    assert (7 div -3) == -2
+    assert (-7 div -3) == 2
 proc `div`*(x, y: int8): int8 {.magic: "DivI", noSideEffect.}
 proc `div`*(x, y: int16): int16 {.magic: "DivI", noSideEffect.}
 proc `div`*(x, y: int32): int32 {.magic: "DivI", noSideEffect.}
 proc `div`*(x, y: int64): int64 {.magic: "DivI", noSideEffect.}
 
-proc `mod`*(x, y: int): int {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int): int {.magic: "ModI", noSideEffect.} =
   ## Computes the integer modulo operation (remainder).
   ##
-  ## This is the same as ``x - (x div y) * y``.
-  ##
-  ## .. code-block:: Nim
-  ##   ( 7 mod  5) ==  2
-  ##   (-7 mod  5) == -2
-  ##   ( 7 mod -5) ==  2
-  ##   (-7 mod -5) == -2
+  ## This is the same as `x - (x div y) * y`.
+  runnableExamples:
+    assert (7 mod 5) == 2
+    assert (-7 mod 5) == -2
+    assert (7 mod -5) == 2
+    assert (-7 mod -5) == -2
 proc `mod`*(x, y: int8): int8 {.magic: "ModI", noSideEffect.}
 proc `mod`*(x, y: int16): int16 {.magic: "ModI", noSideEffect.}
 proc `mod`*(x, y: int32): int32 {.magic: "ModI", noSideEffect.}
 proc `mod`*(x, y: int64): int64 {.magic: "ModI", noSideEffect.}
 
-when defined(nimOldShiftRight) or not defined(nimAshr):
+when defined(nimOldShiftRight):
   const shrDepMessage = "`shr` will become sign preserving."
   proc `shr`*(x: int, y: SomeInteger): int {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
   proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
@@ -248,7 +132,7 @@ when defined(nimOldShiftRight) or not defined(nimAshr):
   proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
   proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
 else:
-  proc `shr`*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
     ## Computes the `shift right` operation of `x` and `y`, filling
     ## vacant bit positions with the sign bit.
     ##
@@ -256,87 +140,77 @@ else:
     ## is different than in *C*.
     ##
     ## See also:
-    ## * `ashr proc <#ashr,int,SomeInteger>`_ for arithmetic shift right
-    ##
-    ## .. code-block:: Nim
-    ##   0b0001_0000'i8 shr 2 == 0b0000_0100'i8
-    ##   0b0000_0001'i8 shr 1 == 0b0000_0000'i8
-    ##   0b1000_0000'i8 shr 4 == 0b1111_1000'i8
-    ##   -1 shr 5 == -1
-    ##   1 shr 5 == 0
-    ##   16 shr 2 == 4
-    ##   -16 shr 2 == -4
+    ## * `ashr func<#ashr,int,SomeInteger>`_ for arithmetic shift right
+    runnableExamples:
+      assert 0b0001_0000'i8 shr 2 == 0b0000_0100'i8
+      assert 0b0000_0001'i8 shr 1 == 0b0000_0000'i8
+      assert 0b1000_0000'i8 shr 4 == 0b1111_1000'i8
+      assert -1 shr 5 == -1
+      assert 1 shr 5 == 0
+      assert 16 shr 2 == 4
+      assert -16 shr 2 == -4
   proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
   proc `shr`*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
   proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
   proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
 
 
-proc `shl`*(x: int, y: SomeInteger): int {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int, y: SomeInteger): int {.magic: "ShlI", noSideEffect.} =
   ## Computes the `shift left` operation of `x` and `y`.
   ##
   ## **Note**: `Operator precedence <manual.html#syntax-precedence>`_
   ## is different than in *C*.
-  ##
-  ## .. code-block:: Nim
-  ##  1'i32 shl 4 == 0x0000_0010
-  ##  1'i64 shl 4 == 0x0000_0000_0000_0010
+  runnableExamples:
+    assert 1'i32 shl 4 == 0x0000_0010
+    assert 1'i64 shl 4 == 0x0000_0000_0000_0010
 proc `shl`*(x: int8, y: SomeInteger): int8 {.magic: "ShlI", noSideEffect.}
 proc `shl`*(x: int16, y: SomeInteger): int16 {.magic: "ShlI", noSideEffect.}
 proc `shl`*(x: int32, y: SomeInteger): int32 {.magic: "ShlI", noSideEffect.}
 proc `shl`*(x: int64, y: SomeInteger): int64 {.magic: "ShlI", noSideEffect.}
 
-when defined(nimAshr):
-  proc ashr*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.}
-    ## Shifts right by pushing copies of the leftmost bit in from the left,
-    ## and let the rightmost bits fall off.
-    ##
-    ## Note that `ashr` is not an operator so use the normal function
-    ## call syntax for it.
-    ##
-    ## See also:
-    ## * `shr proc <#shr,int,SomeInteger>`_
-    ##
-    ## .. code-block:: Nim
-    ##   ashr(0b0001_0000'i8, 2) == 0b0000_0100'i8
-    ##   ashr(0b1000_0000'i8, 8) == 0b1111_1111'i8
-    ##   ashr(0b1000_0000'i8, 1) == 0b1100_0000'i8
-  proc ashr*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
-  proc ashr*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
-  proc ashr*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
-  proc ashr*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
-else:
-  # used for bootstrapping the compiler
-  proc ashr*[T](x: T, y: SomeInteger): T = discard
-
-proc `and`*(x, y: int): int {.magic: "BitandI", noSideEffect.}
-  ## Computes the `bitwise and` of numbers `x` and `y`.
+proc ashr*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
+  ## Shifts right by pushing copies of the leftmost bit in from the left,
+  ## and let the rightmost bits fall off.
+  ##
+  ## Note that `ashr` is not an operator so use the normal function
+  ## call syntax for it.
   ##
-  ## .. code-block:: Nim
-  ##   (0b0011 and 0b0101) == 0b0001
-  ##   (0b0111 and 0b1100) == 0b0100
+  ## See also:
+  ## * `shr func<#shr,int,SomeInteger>`_
+  runnableExamples:
+    assert ashr(0b0001_0000'i8, 2) == 0b0000_0100'i8
+    assert ashr(0b1000_0000'i8, 8) == 0b1111_1111'i8
+    assert ashr(0b1000_0000'i8, 1) == 0b1100_0000'i8
+proc ashr*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
+
+proc `and`*(x, y: int): int {.magic: "BitandI", noSideEffect.} =
+  ## Computes the `bitwise and` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 and 0b0101) == 0b0001
+    assert (0b0111 and 0b1100) == 0b0100
 proc `and`*(x, y: int8): int8 {.magic: "BitandI", noSideEffect.}
 proc `and`*(x, y: int16): int16 {.magic: "BitandI", noSideEffect.}
 proc `and`*(x, y: int32): int32 {.magic: "BitandI", noSideEffect.}
 proc `and`*(x, y: int64): int64 {.magic: "BitandI", noSideEffect.}
 
-proc `or`*(x, y: int): int {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int): int {.magic: "BitorI", noSideEffect.} =
   ## Computes the `bitwise or` of numbers `x` and `y`.
-  ##
-  ## .. code-block:: Nim
-  ##   (0b0011 or 0b0101) == 0b0111
-  ##   (0b0111 or 0b1100) == 0b1111
+  runnableExamples:
+    assert (0b0011 or 0b0101) == 0b0111
+    assert (0b0111 or 0b1100) == 0b1111
 proc `or`*(x, y: int8): int8 {.magic: "BitorI", noSideEffect.}
 proc `or`*(x, y: int16): int16 {.magic: "BitorI", noSideEffect.}
 proc `or`*(x, y: int32): int32 {.magic: "BitorI", noSideEffect.}
 proc `or`*(x, y: int64): int64 {.magic: "BitorI", noSideEffect.}
 
-proc `xor`*(x, y: int): int {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int): int {.magic: "BitxorI", noSideEffect.} =
   ## Computes the `bitwise xor` of numbers `x` and `y`.
-  ##
-  ## .. code-block:: Nim
-  ##   (0b0011 xor 0b0101) == 0b0110
-  ##   (0b0111 xor 0b1100) == 0b1011
+  runnableExamples:
+    assert (0b0011 xor 0b0101) == 0b0110
+    assert (0b0111 xor 0b1100) == 0b1011
 proc `xor`*(x, y: int8): int8 {.magic: "BitxorI", noSideEffect.}
 proc `xor`*(x, y: int16): int16 {.magic: "BitxorI", noSideEffect.}
 proc `xor`*(x, y: int32): int32 {.magic: "BitxorI", noSideEffect.}
@@ -408,7 +282,7 @@ proc `*`*(x, y: uint64): uint64 {.magic: "MulU", noSideEffect.}
 
 proc `div`*(x, y: uint): uint {.magic: "DivU", noSideEffect.}
   ## Computes the integer division for unsigned integers.
-  ## This is roughly the same as ``trunc(x/y)``.
+  ## This is roughly the same as `trunc(x/y)`.
 proc `div`*(x, y: uint8): uint8 {.magic: "DivU", noSideEffect.}
 proc `div`*(x, y: uint16): uint16 {.magic: "DivU", noSideEffect.}
 proc `div`*(x, y: uint32): uint32 {.magic: "DivU", noSideEffect.}
@@ -416,12 +290,65 @@ proc `div`*(x, y: uint64): uint64 {.magic: "DivU", noSideEffect.}
 
 proc `mod`*(x, y: uint): uint {.magic: "ModU", noSideEffect.}
   ## Computes the integer modulo operation (remainder) for unsigned integers.
-  ## This is the same as ``x - (x div y) * y``.
+  ## This is the same as `x - (x div y) * y`.
 proc `mod`*(x, y: uint8): uint8 {.magic: "ModU", noSideEffect.}
 proc `mod`*(x, y: uint16): uint16 {.magic: "ModU", noSideEffect.}
 proc `mod`*(x, y: uint32): uint32 {.magic: "ModU", noSideEffect.}
 proc `mod`*(x, y: uint64): uint64 {.magic: "ModU", noSideEffect.}
 
+proc `+=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Inc", noSideEffect.}
+  ## Increments an integer.
+
+proc `-=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Dec", noSideEffect.}
+  ## Decrements an integer.
+
+proc `*=`*[T: SomeInteger](x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Binary `*=` operator for integers.
+  x = x * y
+
+# floating point operations:
+proc `+`*(x: float32): float32 {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float32): float32 {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float32): float32 {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float32): float32 {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float32): float32 {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float32): float32 {.magic: "DivF64", noSideEffect.}
+
+proc `+`*(x: float): float {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float): float {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float): float {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float): float {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float): float {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float): float {.magic: "DivF64", noSideEffect.}
+
+proc `+=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Increments in place a floating point number.
+  x = x + y
+
+proc `-=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Decrements in place a floating point number.
+  x = x - y
+
+proc `*=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Multiplies in place a floating point number.
+  x = x * y
+
+proc `/=`*(x: var float64, y: float64) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+proc `/=`*[T: float|float32](x: var T, y: T) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+# the following have to be included in system, not imported for some reason:
+
 proc `+%`*(x, y: int): int {.inline.} =
   ## Treats `x` and `y` as unsigned and adds them.
   ##
@@ -476,16 +403,3 @@ proc `%%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) mod cast[u
 proc `%%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) mod cast[uint16](y))
 proc `%%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) mod cast[uint32](y))
 proc `%%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) mod cast[uint64](y))
-
-proc `+=`*[T: SomeInteger](x: var T, y: T) {.
-  magic: "Inc", noSideEffect.}
-  ## Increments an integer.
-
-proc `-=`*[T: SomeInteger](x: var T, y: T) {.
-  magic: "Dec", noSideEffect.}
-  ## Decrements an integer.
-
-proc `*=`*[T: SomeInteger](x: var T, y: T) {.
-  inline, noSideEffect.} =
-  ## Binary `*=` operator for integers.
-  x = x * y
diff --git a/lib/system/assertions.nim b/lib/system/assertions.nim
deleted file mode 100644
index c6283c89c..000000000
--- a/lib/system/assertions.nim
+++ /dev/null
@@ -1,110 +0,0 @@
-when not declared(sysFatal):
-  include "system/fatal"
-
-import std/private/miscdollars
-# ---------------------------------------------------------------------------
-# helpers
-
-type InstantiationInfo = tuple[filename: string, line: int, column: int]
-
-proc `$`(x: int): string {.magic: "IntToStr", noSideEffect.}
-proc `$`(info: InstantiationInfo): string =
-  # The +1 is needed here
-  # instead of overriding `$` (and changing its meaning), consider explicit name.
-  result = ""
-  result.toLocation(info.filename, info.line, info.column+1)
-
-# ---------------------------------------------------------------------------
-
-when not defined(nimHasSinkInference):
-  {.pragma: nosinks.}
-
-proc raiseAssert*(msg: string) {.noinline, noreturn, nosinks.} =
-  sysFatal(AssertionDefect, msg)
-
-proc failedAssertImpl*(msg: string) {.raises: [], tags: [].} =
-  # trick the compiler to not list ``AssertionDefect`` when called
-  # by ``assert``.
-  type Hide = proc (msg: string) {.noinline, raises: [], noSideEffect,
-                                    tags: [].}
-  cast[Hide](raiseAssert)(msg)
-
-template assertImpl(cond: bool, msg: string, expr: string, enabled: static[bool]) =
-  when enabled:
-    const
-      loc = instantiationInfo(fullPaths = compileOption("excessiveStackTrace"))
-      ploc = $loc
-    bind instantiationInfo
-    mixin failedAssertImpl
-    {.line: loc.}:
-      if not cond:
-        failedAssertImpl(ploc & " `" & expr & "` " & msg)
-
-template assert*(cond: untyped, msg = "") =
-  ## Raises ``AssertionDefect`` with `msg` if `cond` is false. Note
-  ## that ``AssertionDefect`` is hidden from the effect system, so it doesn't
-  ## produce ``{.raises: [AssertionDefect].}``. This exception is only supposed
-  ## to be caught by unit testing frameworks.
-  ##
-  ## The compiler may not generate any code at all for ``assert`` if it is
-  ## advised to do so through the ``-d:danger`` or ``--assertions:off``
-  ## `command line switches <nimc.html#compiler-usage-command-line-switches>`_.
-  ##
-  ## .. code-block:: nim
-  ##   static: assert 1 == 9, "This assertion generates code when not built with -d:danger or --assertions:off"
-  const expr = astToStr(cond)
-  assertImpl(cond, msg, expr, compileOption("assertions"))
-
-template doAssert*(cond: untyped, msg = "") =
-  ## Similar to ``assert`` but is always turned on regardless of ``--assertions``.
-  ##
-  ## .. code-block:: nim
-  ##   static: doAssert 1 == 9, "This assertion generates code when built with/without -d:danger or --assertions:off"
-  const expr = astToStr(cond)
-  assertImpl(cond, msg, expr, true)
-
-template onFailedAssert*(msg, code: untyped): untyped {.dirty.} =
-  ## Sets an assertion failure handler that will intercept any assert
-  ## statements following `onFailedAssert` in the current module scope.
-  ##
-  ## .. code-block:: nim
-  ##  # module-wide policy to change the failed assert
-  ##  # exception type in order to include a lineinfo
-  ##  onFailedAssert(msg):
-  ##    var e = new(TMyError)
-  ##    e.msg = msg
-  ##    e.lineinfo = instantiationInfo(-2)
-  ##    raise e
-  ##
-  template failedAssertImpl(msgIMPL: string): untyped {.dirty.} =
-    let msg = msgIMPL
-    code
-
-template doAssertRaises*(exception: typedesc, code: untyped) =
-  ## Raises ``AssertionDefect`` if specified ``code`` does not raise the
-  ## specified exception. Example:
-  ##
-  ## .. code-block:: nim
-  ##  doAssertRaises(ValueError):
-  ##    raise newException(ValueError, "Hello World")
-  var wrong = false
-  when Exception is exception:
-    try:
-      if true:
-        code
-      wrong = true
-    except Exception:
-      discard
-  else:
-    try:
-      if true:
-        code
-      wrong = true
-    except exception:
-      discard
-    except Exception:
-      raiseAssert(astToStr(exception) &
-                  " wasn't raised, another error was raised instead by:\n"&
-                  astToStr(code))
-  if wrong:
-    raiseAssert(astToStr(exception) & " wasn't raised by:\n" & astToStr(code))
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index ff4ac021e..9f4cbc0fe 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -7,14 +7,16 @@
 #    distribution, for details about the copyright.
 #
 
+include seqs_v2_reimpl
+
 proc genericResetAux(dest: pointer, n: ptr TNimNode) {.benign.}
 
 proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) {.benign.}
 proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
                       shallow: bool) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericAssignAux(cast[pointer](d +% n.offset),
@@ -38,45 +40,67 @@ proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
   #  echo "ugh memory corruption! ", n.kind
   #  quit 1
 
+template deepSeqAssignImpl(operation, additionalArg) {.dirty.} =
+  var d = cast[ptr NimSeqV2Reimpl](dest)
+  var s = cast[ptr NimSeqV2Reimpl](src)
+  d.len = s.len
+  let elem = mt.base
+  d.p = cast[ptr NimSeqPayloadReimpl](newSeqPayload(s.len, elem.size, elem.align))
+
+  let bs = elem.size
+  let ba = elem.align
+  let headerSize = align(sizeof(NimSeqPayloadBase), ba)
+
+  for i in 0..d.len-1:
+    operation(d.p +! (headerSize+i*bs), s.p +! (headerSize+i*bs), mt.base, additionalArg)
+
 proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericAssignAux 2")
   case mt.kind
   of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil or shallow or (
-        cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
-      unsureAsgnRef(x, s2)
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
     else:
-      unsureAsgnRef(x, copyString(cast[NimString](s2)))
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil or shallow or (
+          cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyString(cast[NimString](s2)))
   of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
-      # this can happen! nil sequences are allowed
-      unsureAsgnRef(x, s2)
-      return
-    sysAssert(dest != nil, "genericAssignAux 3")
-    if ntfNoRefs in mt.base.flags:
-      var ss = nimNewSeqOfCap(mt, seq.len)
-      cast[PGenericSeq](ss).len = seq.len
-      unsureAsgnRef(x, ss)
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      copyMem(cast[pointer](dst +% align(GenericSeqSize, mt.base.align)),
-              cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align)),
-              seq.len *% mt.base.size)
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericAssignAux, shallow)
     else:
-      unsureAsgnRef(x, newSeq(mt, seq.len))
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      for i in 0..seq.len-1:
-        genericAssignAux(
-          cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
-          cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
-          mt.base, shallow)
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
+        # this can happen! nil sequences are allowed
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericAssignAux 3")
+      if ntfNoRefs in mt.base.flags:
+        var ss = nimNewSeqOfCap(mt, seq.len)
+        cast[PGenericSeq](ss).len = seq.len
+        unsureAsgnRef(x, ss)
+        var dst = cast[int](cast[PPointer](dest)[])
+        copyMem(cast[pointer](dst +% align(GenericSeqSize, mt.base.align)),
+                cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align)),
+                seq.len *% mt.base.size)
+      else:
+        unsureAsgnRef(x, newSeq(mt, seq.len))
+        var dst = cast[int](cast[PPointer](dest)[])
+        for i in 0..seq.len-1:
+          genericAssignAux(
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            mt.base, shallow)
   of tyObject:
     var it = mt.base
     # don't use recursion here on the PNimType because the subtype
@@ -87,14 +111,23 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
     genericAssignAux(dest, src, mt.node, shallow)
     # we need to copy m_type field for tyObject, as it could be empty for
     # sequence reallocations:
-    var pint = cast[ptr PNimType](dest)
-    # We need to copy the *static* type not the dynamic type:
-    #   if p of TB:
-    #     var tbObj = TB(p)
-    #     tbObj of TC # needs to be false!
-    #c_fprintf(stdout, "%s %s\n", pint[].name, mt.name)
-    chckObjAsgn(cast[ptr PNimType](src)[], mt)
-    pint[] = mt # cast[ptr PNimType](src)[]
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      #chckObjAsgn(cast[ptr PNimTypeV2](src)[].typeInfoV2, mt)
+      pint[] = cast[PNimTypeV2](mt.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      # We need to copy the *static* type not the dynamic type:
+      #   if p of TB:
+      #     var tbObj = TB(p)
+      #     tbObj of TC # needs to be false!
+      #c_fprintf(stdout, "%s %s\n", pint[].name, mt.name)
+      let srcType = cast[ptr PNimType](src)[]
+      if srcType != nil:
+        # `!= nil` needed because of cases where object is not initialized properly (see bug #16706)
+        # note that you can have `srcType == nil` yet `src != nil`
+        chckObjAsgn(srcType, mt)
+      pint[] = mt # cast[ptr PNimType](src)[]
   of tyTuple:
     genericAssignAux(dest, src, mt.node, shallow)
   of tyArray, tyArrayConstr:
@@ -134,7 +167,7 @@ when false:
     of tyPointer: k = "range"
     of tyOpenArray: k = "openarray"
     of tyString: k = "string"
-    of tyCString: k = "cstring"
+    of tyCstring: k = "cstring"
     of tyInt: k = "int"
     of tyInt32: k = "int32"
     else: k = "other"
@@ -148,15 +181,15 @@ proc genericSeqAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
 proc genericAssignOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericAssign(cast[pointer](d +% i *% mt.base.size),
                   cast[pointer](s +% i *% mt.base.size), mt.base)
 
 proc objectInit(dest: pointer, typ: PNimType) {.compilerproc, benign.}
 proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "objectInitAux")
   of nkSlot: objectInit(cast[pointer](d +% n.offset), n.typ)
@@ -170,13 +203,17 @@ proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
 proc objectInit(dest: pointer, typ: PNimType) =
   # the generic init proc that takes care of initialization of complex
   # objects on the stack or heap
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case typ.kind
   of tyObject:
     # iterate over any structural type
     # here we have to init the type field:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = typ
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = cast[PNimTypeV2](typ.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = typ
     objectInitAux(dest, typ.node)
   of tyTuple:
     objectInitAux(dest, typ.node)
@@ -189,7 +226,7 @@ proc objectInit(dest: pointer, typ: PNimType) =
 
 proc genericReset(dest: pointer, mt: PNimType) {.compilerproc, benign.}
 proc genericResetAux(dest: pointer, n: ptr TNimNode) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "genericResetAux")
   of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ)
@@ -201,18 +238,35 @@ proc genericResetAux(dest: pointer, n: ptr TNimNode) =
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
 
 proc genericReset(dest: pointer, mt: PNimType) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   sysAssert(mt != nil, "genericReset 2")
   case mt.kind
-  of tyString, tyRef, tySequence:
+  of tyRef:
     unsureAsgnRef(cast[PPointer](dest), nil)
+  of tyString:
+    when defined(nimSeqsV2):
+      var s = cast[ptr NimStringV2](dest)
+      frees(s[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
+  of tySequence:
+    when defined(nimSeqsV2):
+      frees(cast[ptr NimSeqV2Reimpl](dest)[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
   of tyTuple:
     genericResetAux(dest, mt.node)
   of tyObject:
     genericResetAux(dest, mt.node)
     # also reset the type field for tyObject, for correct branch switching!
-    var pint = cast[ptr PNimType](dest)
-    pint[] = nil
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = nil
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = nil
   of tyArray, tyArrayConstr:
     for i in 0..(mt.size div mt.base.size)-1:
       genericReset(cast[pointer](d +% i *% mt.base.size), mt.base)
@@ -221,10 +275,12 @@ proc genericReset(dest: pointer, mt: PNimType) =
 
 proc selectBranch(discVal, L: int,
                   a: ptr array[0x7fff, ptr TNimNode]): ptr TNimNode =
-  result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
-    let x = a[discVal]
-    if x != nil: result = x
+    result = a[discVal]
+    if result == nil:
+      result = a[L]
+  else:
+    result = a[L] # a[L] contains the ``else`` part (but may be nil)
 
 proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
                             a: ptr array[0x7fff, ptr TNimNode],
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
deleted file mode 100644
index 15e8a56a7..000000000
--- a/lib/system/atomics.nim
+++ /dev/null
@@ -1,340 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Atomic operations for Nim.
-{.push stackTrace:off, profiler:off.}
-
-const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)
-const someVcc = defined(vcc) or defined(clang_cl)
-
-type
-  AtomType* = SomeNumber|pointer|ptr|char|bool
-    ## Type Class representing valid types for use with atomic procs
-
-when someGcc and hasThreadSupport:
-  type AtomMemModel* = distinct cint
-
-  var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
-    ## No barriers or synchronization.
-  var ATOMIC_CONSUME* {.importc: "__ATOMIC_CONSUME", nodecl.}: AtomMemModel
-    ## Data dependency only for both barrier and
-    ## synchronization with another thread.
-  var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
-    ## Barrier to hoisting of code and synchronizes with
-    ## release (or stronger)
-    ## semantic stores from another thread.
-  var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
-    ## Barrier to sinking of code and synchronizes with
-    ## acquire (or stronger)
-    ## semantic loads from another thread.
-  var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
-    ## Full barrier in both directions and synchronizes
-    ## with acquire loads
-    ## and release stores in another thread.
-  var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
-    ## Full barrier in both directions and synchronizes
-    ## with acquire loads
-    ## and release stores in all threads.
-
-  proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
-    importc: "__atomic_load_n", nodecl.}
-    ## This proc implements an atomic load operation. It returns the contents at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.
-
-  proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_load", nodecl.}
-    ## This is the generic version of an atomic load. It returns the contents at p in ret.
-
-  proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
-    importc: "__atomic_store_n", nodecl.}
-    ## This proc implements an atomic store operation. It writes val at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.
-
-  proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_store", nodecl.}
-    ## This is the generic version of an atomic store. It stores the value of val at p
-
-  proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_exchange_n", nodecl.}
-    ## This proc implements an atomic exchange operation. It writes val at p,
-    ## and returns the previous contents at p.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL
-
-  proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_exchange", nodecl.}
-    ## This is the generic version of an atomic exchange. It stores the contents at val at p.
-    ## The original value at p is copied into ret.
-
-  proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
-    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange_n ", nodecl.}
-    ## This proc implements an atomic compare and exchange operation. This compares the
-    ## contents at p with the contents at expected and if equal, writes desired at p.
-    ## If they are not equal, the current contents at p is written into expected.
-    ## Weak is true for weak compare_exchange, and false for the strong variation.
-    ## Many targets only offer the strong variation and ignore the parameter.
-    ## When in doubt, use the strong variation.
-    ## True is returned if desired is written at p and the execution is considered
-    ## to conform to the memory model specified by success_memmodel. There are no
-    ## restrictions on what memory model can be used here. False is returned otherwise,
-    ## and the execution is considered to conform to failure_memmodel. This memory model
-    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
-    ## than that specified by success_memmodel.
-
-  proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
-    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange", nodecl.}
-    ## This proc implements the generic version of atomic_compare_exchange.
-    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
-    ## value is also a pointer.
-
-  ## Perform the operation return the new value, all memory models are valid
-  proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_add_fetch", nodecl.}
-  proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_sub_fetch", nodecl.}
-  proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_or_fetch ", nodecl.}
-  proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_and_fetch", nodecl.}
-  proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_xor_fetch", nodecl.}
-  proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_nand_fetch ", nodecl.}
-
-  ## Perform the operation return the old value, all memory models are valid
-  proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_add", nodecl.}
-  proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_sub", nodecl.}
-  proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_or", nodecl.}
-  proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_and", nodecl.}
-  proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_xor", nodecl.}
-  proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
-    importc: "__atomic_fetch_nand", nodecl.}
-
-  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
-    importc: "__atomic_test_and_set", nodecl.}
-    ## This built-in function performs an atomic test-and-set operation on the byte at p.
-    ## The byte is set to some implementation defined nonzero “set” value and the return
-    ## value is true if and only if the previous contents were “set”.
-    ## All memory models are valid.
-
-  proc atomicClear*(p: pointer, mem: AtomMemModel) {.
-    importc: "__atomic_clear", nodecl.}
-    ## This built-in function performs an atomic clear operation at p.
-    ## After the operation, at p contains 0.
-    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE
-
-  proc atomicThreadFence*(mem: AtomMemModel) {.
-    importc: "__atomic_thread_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between threads based
-    ## on the specified memory model. All memory orders are valid.
-
-  proc atomicSignalFence*(mem: AtomMemModel) {.
-    importc: "__atomic_signal_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between a thread and
-    ## signal handlers based in the same thread. All memory orders are valid.
-
-  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
-    importc: "__atomic_always_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate
-    ## lock free atomic instructions for the target architecture. size must resolve
-    ## to a compile-time constant and the result also resolves to a compile-time constant.
-    ## ptr is an optional pointer to the object that may be used to determine alignment.
-    ## A value of 0 indicates typical alignment should be used. The compiler may also
-    ## ignore this parameter.
-
-  proc atomicIsLockFree*(size: int, p: pointer): bool {.
-    importc: "__atomic_is_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate
-    ## lock free atomic instructions for the target architecture. If it is not known
-    ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
-    ## ptr is an optional pointer to the object that may be used to determine alignment.
-    ## A value of 0 indicates typical alignment should be used. The compiler may also
-    ## ignore this parameter.
-
-  template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
-elif someVcc and hasThreadSupport:
-  type AtomMemModel* = distinct cint
-
-  const
-    ATOMIC_RELAXED = 0.AtomMemModel
-    ATOMIC_CONSUME = 1.AtomMemModel
-    ATOMIC_ACQUIRE = 2.AtomMemModel
-    ATOMIC_RELEASE = 3.AtomMemModel
-    ATOMIC_ACQ_REL = 4.AtomMemModel
-    ATOMIC_SEQ_CST = 5.AtomMemModel
-
-  proc `==`(x1, x2: AtomMemModel): bool {.borrow.}
-
-  proc readBarrier() {.importc: "_ReadBarrier",  header: "<intrin.h>".}
-  proc writeBarrier() {.importc: "_WriteBarrier",  header: "<intrin.h>".}
-  proc fence*() {.importc: "_ReadWriteBarrier", header: "<intrin.h>".}
-
-  template barrier(mem: AtomMemModel) =
-    when mem == ATOMIC_RELAXED: discard
-    elif mem == ATOMIC_CONSUME: readBarrier()
-    elif mem == ATOMIC_ACQUIRE: writeBarrier()
-    elif mem == ATOMIC_RELEASE: fence()
-    elif mem == ATOMIC_ACQ_REL: fence()
-    elif mem == ATOMIC_SEQ_CST: fence()
-
-  proc atomicLoadN*[T: AtomType](p: ptr T, mem: static[AtomMemModel]): T =
-    result = p[]
-    barrier(mem)
-
-  when defined(cpp):
-    when sizeof(int) == 8:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importcpp: "_InterlockedExchangeAdd64(static_cast<NI volatile *>(#), #)",
-        header: "<intrin.h>".}
-    else:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importcpp: "_InterlockedExchangeAdd(reinterpret_cast<long volatile *>(#), static_cast<long>(#))",
-        header: "<intrin.h>".}
-  else:
-    when sizeof(int) == 8:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importc: "_InterlockedExchangeAdd64", header: "<intrin.h>".}
-    else:
-      proc addAndFetch*(p: ptr int, val: int): int {.
-        importc: "_InterlockedExchangeAdd", header: "<intrin.h>".}
-
-else:
-  proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
-    inc(p[], val)
-    result = p[]
-
-proc atomicInc*(memLoc: var int, x: int = 1): int =
-  when someGcc and hasThreadSupport:
-    result = atomicAddFetch(memLoc.addr, x, ATOMIC_RELAXED)
-  elif someVcc and hasThreadSupport:
-    result = addAndFetch(memLoc.addr, x)
-    inc(result, x)
-  else:
-    inc(memLoc, x)
-    result = memLoc
-
-proc atomicDec*(memLoc: var int, x: int = 1): int =
-  when someGcc and hasThreadSupport:
-    when declared(atomicSubFetch):
-      result = atomicSubFetch(memLoc.addr, x, ATOMIC_RELAXED)
-    else:
-      result = atomicAddFetch(memLoc.addr, -x, ATOMIC_RELAXED)
-  elif someVcc and hasThreadSupport:
-    result = addAndFetch(memLoc.addr, -x)
-    dec(result, x)
-  else:
-    dec(memLoc, x)
-    result = memLoc
-
-when someVcc:
-  when defined(cpp):
-    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
-      {.importcpp: "_InterlockedCompareExchange64(static_cast<NI64 volatile *>(#), #, #)", header: "<intrin.h>".}
-    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
-      {.importcpp: "_InterlockedCompareExchange(static_cast<NI volatile *>(#), #, #)", header: "<intrin.h>".}
-    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
-      {.importcpp: "_InterlockedCompareExchange8(static_cast<char volatile *>(#), #, #)", header: "<intrin.h>".}
-  else:
-    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
-      {.importc: "_InterlockedCompareExchange64", header: "<intrin.h>".}
-    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
-      {.importc: "_InterlockedCompareExchange", header: "<intrin.h>".}
-    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
-      {.importc: "_InterlockedCompareExchange8", header: "<intrin.h>".}
-
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    when sizeof(T) == 8:
-      interlockedCompareExchange64(p, cast[int64](newValue), cast[int64](oldValue)) ==
-        cast[int64](oldValue)
-    elif sizeof(T) == 4:
-      interlockedCompareExchange32(p, cast[int32](newValue), cast[int32](oldValue)) ==
-        cast[int32](oldValue)
-    elif sizeof(T) == 1:
-      interlockedCompareExchange8(p, cast[byte](newValue), cast[byte](oldValue)) ==
-        cast[byte](oldValue)
-    else:
-      {.error: "invalid CAS instruction".}
-
-elif defined(tcc):
-  when defined(amd64):
-    {.emit:"""
-static int __tcc_cas(int *ptr, int oldVal, int newVal)
-{
-    unsigned char ret;
-    __asm__ __volatile__ (
-            "  lock\n"
-            "  cmpxchgq %2,%1\n"
-            "  sete %0\n"
-            : "=q" (ret), "=m" (*ptr)
-            : "r" (newVal), "m" (*ptr), "a" (oldVal)
-            : "memory");
-
-    return ret;
-}
-""".}
-  else:
-    #assert sizeof(int) == 4
-    {.emit:"""
-static int __tcc_cas(int *ptr, int oldVal, int newVal)
-{
-    unsigned char ret;
-    __asm__ __volatile__ (
-            "  lock\n"
-            "  cmpxchgl %2,%1\n"
-            "  sete %0\n"
-            : "=q" (ret), "=m" (*ptr)
-            : "r" (newVal), "m" (*ptr), "a" (oldVal)
-            : "memory");
-
-    return ret;
-}
-""".}
-
-  proc tcc_cas(p: ptr int; oldValue, newValue: int): bool
-    {.importc: "__tcc_cas", nodecl.}
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    tcc_cas(cast[ptr int](p), cast[int](oldValue), cast[int](newValue))
-elif declared(atomicCompareExchangeN):
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    atomicCompareExchangeN(p, oldValue.unsafeAddr, newValue, false, ATOMIC_SEQ_CST, ATOMIC_SEQ_CST)
-else:
-  # this is valid for GCC and Intel C++
-  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
-    {.importc: "__sync_bool_compare_and_swap", nodecl.}
-  # XXX is this valid for 'int'?
-
-
-when (defined(x86) or defined(amd64)) and someVcc:
-  proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
-elif (defined(x86) or defined(amd64)) and (someGcc or defined(bcc)):
-  proc cpuRelax* {.inline.} =
-    {.emit: """asm volatile("pause" ::: "memory");""".}
-elif someGcc or defined(tcc):
-  proc cpuRelax* {.inline.} =
-    {.emit: """asm volatile("" ::: "memory");""".}
-elif defined(icl):
-  proc cpuRelax* {.importc: "_mm_pause", header: "xmmintrin.h".}
-elif false:
-  from os import sleep
-
-  proc cpuRelax* {.inline.} = os.sleep(1)
-
-when not declared(fence) and hasThreadSupport:
-  # XXX fixme
-  proc fence*() {.inline.} =
-    var dummy: bool
-    discard cas(addr dummy, false, true)
-
-{.pop.}
diff --git a/lib/system/basic_types.nim b/lib/system/basic_types.nim
index 9db81d1c4..bf81b9b6a 100644
--- a/lib/system/basic_types.nim
+++ b/lib/system/basic_types.nim
@@ -1,23 +1,53 @@
 type
-  int* {.magic: "Int".}         ## Default integer type; bitwidth depends on
+  int* {.magic: Int.}         ## Default integer type; bitwidth depends on
                               ## architecture, but is always the same as a pointer.
-  int8* {.magic: "Int8".}       ## Signed 8 bit integer type.
-  int16* {.magic: "Int16".}     ## Signed 16 bit integer type.
-  int32* {.magic: "Int32".}     ## Signed 32 bit integer type.
-  int64* {.magic: "Int64".}     ## Signed 64 bit integer type.
-  uint* {.magic: "UInt".}       ## Unsigned default integer type.
-  uint8* {.magic: "UInt8".}     ## Unsigned 8 bit integer type.
-  uint16* {.magic: "UInt16".}   ## Unsigned 16 bit integer type.
-  uint32* {.magic: "UInt32".}   ## Unsigned 32 bit integer type.
-  uint64* {.magic: "UInt64".}   ## Unsigned 64 bit integer type.
+  int8* {.magic: Int8.}       ## Signed 8 bit integer type.
+  int16* {.magic: Int16.}     ## Signed 16 bit integer type.
+  int32* {.magic: Int32.}     ## Signed 32 bit integer type.
+  int64* {.magic: Int64.}     ## Signed 64 bit integer type.
+  uint* {.magic: UInt.}       ## Unsigned default integer type.
+  uint8* {.magic: UInt8.}     ## Unsigned 8 bit integer type.
+  uint16* {.magic: UInt16.}   ## Unsigned 16 bit integer type.
+  uint32* {.magic: UInt32.}   ## Unsigned 32 bit integer type.
+  uint64* {.magic: UInt64.}   ## Unsigned 64 bit integer type.
+
+type
+  float* {.magic: Float.}     ## Default floating point type.
+  float32* {.magic: Float32.} ## 32 bit floating point type.
+  float64* {.magic: Float.}   ## 64 bit floating point type.
+
+# 'float64' is now an alias to 'float'; this solves many problems
+
+type
+  char* {.magic: Char.}         ## Built-in 8 bit character type (unsigned).
+  string* {.magic: String.}     ## Built-in string type.
+  cstring* {.magic: Cstring.}   ## Built-in cstring (*compatible string*) type.
+  pointer* {.magic: Pointer.}   ## Built-in pointer type, use the `addr`
+                                ## operator to get a pointer to a variable.
+
+  typedesc* {.magic: TypeDesc.} ## Meta type to denote a type description.
+
+type
+  `ptr`*[T] {.magic: Pointer.}   ## Built-in generic untraced pointer type.
+  `ref`*[T] {.magic: Pointer.}   ## Built-in generic traced pointer type.
+
+  `nil` {.magic: "Nil".}
+
+  void* {.magic: "VoidType".}    ## Meta type to denote the absence of any type.
+  auto* {.magic: Expr.}          ## Meta type for automatic type determination.
+  any* {.deprecated: "Deprecated since v1.5; Use auto instead.".} = distinct auto  ## Deprecated; Use `auto` instead. See https://github.com/nim-lang/RFCs/issues/281
+  untyped* {.magic: Expr.}       ## Meta type to denote an expression that
+                                 ## is not resolved (for templates).
+  typed* {.magic: Stmt.}         ## Meta type to denote an expression that
+                                 ## is resolved (for templates).
 
 type # we need to start a new type section here, so that ``0`` can have a type
   bool* {.magic: "Bool".} = enum ## Built-in boolean type.
     false = 0, true = 1
 
 const
-  on* = true    ## Alias for ``true``.
-  off* = false  ## Alias for ``false``.
+  on* = true    ## Alias for `true`.
+  off* = false  ## Alias for `false`.
 
 type
   SomeSignedInt* = int|int8|int16|int32|int64
@@ -29,34 +59,35 @@ type
   SomeInteger* = SomeSignedInt|SomeUnsignedInt
     ## Type class matching all integer types.
 
+  SomeFloat* = float|float32|float64
+    ## Type class matching all floating point number types.
+
+  SomeNumber* = SomeInteger|SomeFloat
+    ## Type class matching all number types.
+
   SomeOrdinal* = int|int8|int16|int32|int64|bool|enum|uint|uint8|uint16|uint32|uint64
     ## Type class matching all ordinal types; however this includes enums with
     ## holes. See also `Ordinal`
 
-  BiggestInt* = int64
-    ## is an alias for the biggest signed integer type the Nim compiler
-    ## supports. Currently this is ``int64``, but it is platform-dependent
-    ## in general.
-
 
 {.push warning[GcMem]: off, warning[Uninit]: off.}
 {.push hints: off.}
 
 proc `not`*(x: bool): bool {.magic: "Not", noSideEffect.}
-  ## Boolean not; returns true if ``x == false``.
+  ## Boolean not; returns true if `x == false`.
 
 proc `and`*(x, y: bool): bool {.magic: "And", noSideEffect.}
-  ## Boolean ``and``; returns true if ``x == y == true`` (if both arguments
+  ## Boolean `and`; returns true if `x == y == true` (if both arguments
   ## are true).
   ##
-  ## Evaluation is lazy: if ``x`` is false, ``y`` will not even be evaluated.
+  ## Evaluation is lazy: if `x` is false, `y` will not even be evaluated.
 proc `or`*(x, y: bool): bool {.magic: "Or", noSideEffect.}
-  ## Boolean ``or``; returns true if ``not (not x and not y)`` (if any of
+  ## Boolean `or`; returns true if `not (not x and not y)` (if any of
   ## the arguments is true).
   ##
-  ## Evaluation is lazy: if ``x`` is true, ``y`` will not even be evaluated.
+  ## Evaluation is lazy: if `x` is true, `y` will not even be evaluated.
 proc `xor`*(x, y: bool): bool {.magic: "Xor", noSideEffect.}
-  ## Boolean `exclusive or`; returns true if ``x != y`` (if either argument
+  ## Boolean `exclusive or`; returns true if `x != y` (if either argument
   ## is true while the other is false).
 
 {.pop.}
diff --git a/lib/system/bitmasks.nim b/lib/system/bitmasks.nim
index 922ad5fb7..0663247c2 100644
--- a/lib/system/bitmasks.nim
+++ b/lib/system/bitmasks.nim
@@ -10,12 +10,21 @@
 # Page size of the system; in most cases 4096 bytes. For exotic OS or
 # CPU this needs to be changed:
 const
-  PageShift = when defined(cpu16): 8 else: 12 # \
-    # my tests showed no improvements for using larger page sizes.
+  PageShift = when defined(nimPage256) or defined(cpu16): 3
+              elif defined(nimPage512): 9
+              elif defined(nimPage1k): 10
+              else: 12 # \ # my tests showed no improvements for using larger page sizes.
+
   PageSize = 1 shl PageShift
   PageMask = PageSize-1
 
-  MemAlign = 16 # also minimal allocatable memory block
+
+  MemAlign = # also minimal allocatable memory block
+    when defined(nimMemAlignTiny): 4
+    elif defined(useMalloc):
+      when defined(amd64): 16 
+      else: 8
+    else: 16
 
   BitsPerPage = PageSize div MemAlign
   UnitsPerPage = BitsPerPage div (sizeof(int)*8)
diff --git a/lib/system/cellseqs_v1.nim b/lib/system/cellseqs_v1.nim
index 1952491b3..1a305aa42 100644
--- a/lib/system/cellseqs_v1.nim
+++ b/lib/system/cellseqs_v1.nim
@@ -16,18 +16,21 @@ type
     d: PCellArray
 
 proc contains(s: CellSeq, c: PCell): bool {.inline.} =
-  for i in 0 .. s.len-1:
-    if s.d[i] == c: return true
+  for i in 0 ..< s.len:
+    if s.d[i] == c:
+      return true
   return false
 
+proc resize(s: var CellSeq) =
+  s.cap = s.cap * 3 div 2
+  let d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
+  copyMem(d, s.d, s.len * sizeof(PCell))
+  dealloc(s.d)
+  s.d = d
+
 proc add(s: var CellSeq, c: PCell) {.inline.} =
   if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    var d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
-    copyMem(d, s.d, s.len * sizeof(PCell))
-    dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
+    resize(s)
   s.d[s.len] = c
   inc(s.len)
 
diff --git a/lib/system/cellseqs_v2.nim b/lib/system/cellseqs_v2.nim
index b2ae41d73..c6c7b1a8e 100644
--- a/lib/system/cellseqs_v2.nim
+++ b/lib/system/cellseqs_v2.nim
@@ -10,47 +10,44 @@
 # Cell seqs for cyclebreaker and cyclicrefs_v2.
 
 type
-  CellTuple = (PT, PNimType)
-  CellArray = ptr UncheckedArray[CellTuple]
-  CellSeq = object
+  CellTuple[T] = (T, PNimTypeV2)
+  CellArray[T] = ptr UncheckedArray[CellTuple[T]]
+  CellSeq[T] = object
     len, cap: int
-    d: CellArray
+    d: CellArray[T]
 
-proc add(s: var CellSeq, c: PT; t: PNimType) {.inline.} =
+proc resize[T](s: var CellSeq[T]) =
+  s.cap = s.cap * 3 div 2
+  var newSize = s.cap * sizeof(CellTuple[T])
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](reallocShared(s.d, newSize))
+  else:
+    s.d = cast[CellArray[T]](realloc(s.d, newSize))
+
+proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =
   if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    when defined(useMalloc):
-      var d = cast[CellArray](c_malloc(uint(s.cap * sizeof(CellTuple))))
-    else:
-      var d = cast[CellArray](alloc(s.cap * sizeof(CellTuple)))
-    copyMem(d, s.d, s.len * sizeof(CellTuple))
-    when defined(useMalloc):
-      c_free(s.d)
-    else:
-      dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
+    s.resize()
   s.d[s.len] = (c, t)
   inc(s.len)
 
-proc init(s: var CellSeq, cap: int = 1024) =
+proc init[T](s: var CellSeq[T], cap: int = 1024) =
   s.len = 0
   s.cap = cap
-  when defined(useMalloc):
-    s.d = cast[CellArray](c_malloc(uint(s.cap * sizeof(CellTuple))))
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](allocShared(uint(s.cap * sizeof(CellTuple[T]))))
   else:
-    s.d = cast[CellArray](alloc(s.cap * sizeof(CellTuple)))
+    s.d = cast[CellArray[T]](alloc(s.cap * sizeof(CellTuple[T])))
 
-proc deinit(s: var CellSeq) =
+proc deinit[T](s: var CellSeq[T]) =
   if s.d != nil:
-    when defined(useMalloc):
-      c_free(s.d)
+    when compileOption("threads"):
+      deallocShared(s.d)
     else:
       dealloc(s.d)
     s.d = nil
   s.len = 0
   s.cap = 0
 
-proc pop(s: var CellSeq): (PT, PNimType) =
+proc pop[T](s: var CellSeq[T]): (T, PNimTypeV2) =
   result = s.d[s.len-1]
   dec s.len
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index ea00176b5..92036c226 100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -7,13 +7,47 @@
 #    distribution, for details about the copyright.
 #
 
-# Efficient set of pointers for the GC (and repr)
 
-when defined(gcOrc) or defined(gcArc):
+#[
+
+Efficient set of pointers for the GC (and repr)
+-----------------------------------------------
+
+The GC depends on an extremely efficient datastructure for storing a
+set of pointers - this is called a `CellSet` in the source code.
+Inserting, deleting and searching are done in constant time. However,
+modifying a `CellSet` during traversal leads to undefined behaviour.
+
+All operations on a CellSet have to perform efficiently. Because a Cellset can
+become huge a hash table alone is not suitable for this.
+
+We use a mixture of bitset and hash table for this. The hash table maps *pages*
+to a page descriptor. The page descriptor contains a bit for any possible cell
+address within this page. So including a cell is done as follows:
+
+- Find the page descriptor for the page the cell belongs to.
+- Set the appropriate bit in the page descriptor indicating that the
+  cell points to the start of a memory block.
+
+Removing a cell is analogous - the bit has to be set to zero.
+Single page descriptors are never deleted from the hash table. This is not
+needed as the data structures needs to be rebuilt periodically anyway.
+
+Complete traversal is done in this way::
+
+  for each page descriptor d:
+    for each bit in d:
+      if bit == 1:
+        traverse the pointer belonging to this bit
+
+]#
+
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
   type
     PCell = Cell
 
-  include bitmasks
+  when not declaredInScope(PageShift):
+    include bitmasks
 
 else:
   type
@@ -44,7 +78,7 @@ type
     head: PPageDesc
     data: PPageDescArray
 
-when defined(gcOrc) or defined(gcArc):
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
   discard
 else:
   include cellseqs_v1
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index 9d0d248c3..9a7645f9b 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -8,13 +8,3 @@
 #
 
 # Headers for procs that the code generator depends on ("compilerprocs")
-
-type
-  LibHandle = pointer       # private type
-  ProcAddr = pointer        # library loading and loading of procs:
-
-proc nimLoadLibrary(path: string): LibHandle {.compilerproc, hcrInline, nonReloadable.}
-proc nimUnloadLibrary(lib: LibHandle) {.compilerproc, hcrInline, nonReloadable.}
-proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.compilerproc, hcrInline, nonReloadable.}
-
-proc nimLoadLibraryError(path: string) {.compilerproc, hcrInline, nonReloadable.}
diff --git a/lib/system/channels.nim b/lib/system/channels_builtin.nim
index e4b172c16..02b4d8cbf 100644
--- a/lib/system/channels.nim
+++ b/lib/system/channels_builtin.nim
@@ -10,10 +10,10 @@
 ## Channel support for threads.
 ##
 ## **Note**: This is part of the system module. Do not import it directly.
-## To activate thread support compile with the ``--threads:on`` command line switch.
+## To activate thread support compile with the `--threads:on` command line switch.
 ##
-## **Note:** Channels are designed for the ``Thread`` type. They are unstable when
-## used with ``spawn``
+## **Note:** Channels are designed for the `Thread` type. They are unstable when
+## used with `spawn`
 ##
 ## **Note:** The current implementation of message passing does
 ## not work with cyclic data structures.
@@ -26,11 +26,11 @@
 ## The following is a simple example of two different ways to use channels:
 ## blocking and non-blocking.
 ##
-## .. code-block :: Nim
+##   ```Nim
 ##   # Be sure to compile with --threads:on.
 ##   # The channels and threads modules are part of system and should not be
 ##   # imported.
-##   import os
+##   import std/os
 ##
 ##   # Channels can either be:
 ##   #  - declared at the module level, or
@@ -87,21 +87,23 @@
 ##
 ##   # Clean up the channel.
 ##   chan.close()
+##   ```
 ##
 ## Sample output
 ## -------------
 ## The program should output something similar to this, but keep in mind that
-## exact results may vary in the real world::
-##   Hello World!
-##   Pretend I'm doing useful work...
-##   Pretend I'm doing useful work...
-##   Pretend I'm doing useful work...
-##   Pretend I'm doing useful work...
-##   Pretend I'm doing useful work...
-##   Another message
+## exact results may vary in the real world:
+##
+##     Hello World!
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Another message
 ##
 ## Passing Channels Safely
-## ----------------------
+## -----------------------
 ## Note that when passing objects to procedures on another thread by pointer
 ## (for example through a thread's argument), objects created using the default
 ## allocator will use thread-local, GC-managed memory. Thus it is generally
@@ -109,10 +111,10 @@
 ## in which case they will use a process-wide (thread-safe) shared heap.
 ##
 ## However, it is possible to manually allocate shared memory for channels
-## using e.g. ``system.allocShared0`` and pass these pointers through thread
+## using e.g. `system.allocShared0` and pass these pointers through thread
 ## arguments:
 ##
-## .. code-block :: Nim
+##   ```Nim
 ##   proc worker(channel: ptr Channel[string]) =
 ##     let greeting = channel[].recv()
 ##     echo greeting
@@ -134,10 +136,13 @@
 ##     deallocShared(channel)
 ##
 ##   localChannelExample() # "Hello from the main thread!"
+##   ```
 
 when not declared(ThisIsSystem):
   {.error: "You must not import this module explicitly".}
 
+import std/private/syslocks
+
 type
   pbytes = ptr UncheckedArray[byte]
   RawChannel {.pure, final.} = object ## msg queue for a thread
@@ -151,7 +156,7 @@ type
       region: MemRegion
   PRawChannel = ptr RawChannel
   LoadStoreMode = enum mStore, mLoad
-  Channel* {.gcsafe.}[TMsg] = RawChannel ## a channel for thread communication
+  Channel*[TMsg] {.gcsafe.} = RawChannel ## a channel for thread communication
 
 const ChannelDeadMask = -2
 
@@ -182,8 +187,8 @@ when not usesDestructors:
   proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
                 mode: LoadStoreMode) {.benign.} =
     var
-      d = cast[ByteAddress](dest)
-      s = cast[ByteAddress](src)
+      d = cast[int](dest)
+      s = cast[int](src)
     case n.kind
     of nkSlot: storeAux(cast[pointer](d +% n.offset),
                         cast[pointer](s +% n.offset), n.typ, t, mode)
@@ -202,8 +207,8 @@ when not usesDestructors:
       cast[pointer](cast[int](p) +% x)
 
     var
-      d = cast[ByteAddress](dest)
-      s = cast[ByteAddress](src)
+      d = cast[int](dest)
+      s = cast[int](src)
     sysAssert(mt != nil, "mt == nil")
     case mt.kind
     of tyString:
@@ -242,14 +247,14 @@ when not usesDestructors:
           x[] = alloc0(t.region, align(GenericSeqSize, mt.base.align) +% seq.len *% mt.base.size)
         else:
           unsureAsgnRef(x, newSeq(mt, seq.len))
-        var dst = cast[ByteAddress](cast[PPointer](dest)[])
+        var dst = cast[int](cast[PPointer](dest)[])
         var dstseq = cast[PGenericSeq](dst)
         dstseq.len = seq.len
         dstseq.reserved = seq.len
         for i in 0..seq.len-1:
           storeAux(
-            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i*% mt.base.size),
-            cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +%
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +%
                           i *% mt.base.size),
             mt.base, t, mode)
         if mode != mStore: dealloc(t.region, s2)
@@ -265,8 +270,8 @@ when not usesDestructors:
       storeAux(dest, src, mt.node, t, mode)
     of tyArray, tyArrayConstr:
       for i in 0..(mt.size div mt.base.size)-1:
-        storeAux(cast[pointer](d +% i*% mt.base.size),
-                cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
+        storeAux(cast[pointer](d +% i *% mt.base.size),
+                cast[pointer](s +% i *% mt.base.size), mt.base, t, mode)
     of tyRef:
       var s = cast[PPointer](src)[]
       var x = cast[PPointer](dest)
@@ -360,8 +365,8 @@ proc sendImpl(q: PRawChannel, typ: PNimType, msg: pointer, noBlock: bool): bool
 
   rawSend(q, msg, typ)
   q.elemType = typ
-  releaseSys(q.lock)
   signalSysCond(q.cond)
+  releaseSys(q.lock)
   result = true
 
 proc send*[TMsg](c: var Channel[TMsg], msg: sink TMsg) {.inline.} =
@@ -389,7 +394,7 @@ proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
   q.ready = false
   if typ != q.elemType:
     releaseSys(q.lock)
-    sysFatal(ValueError, "cannot receive message of wrong type")
+    raise newException(ValueError, "cannot receive message of wrong type")
   rawRecv(q, res, typ)
   if q.maxItems > 0 and q.count == q.maxItems - 1:
     # Parent thread is awaiting in send. Wake it up.
@@ -410,8 +415,8 @@ proc tryRecv*[TMsg](c: var Channel[TMsg]): tuple[dataAvailable: bool,
   ## Tries to receive a message from the channel `c`, but this can fail
   ## for all sort of reasons, including contention.
   ##
-  ## If it fails, it returns ``(false, default(msg))`` otherwise it
-  ## returns ``(true, msg)``.
+  ## If it fails, it returns `(false, default(msg))` otherwise it
+  ## returns `(true, msg)`.
   var q = cast[PRawChannel](addr(c))
   if q.mask != ChannelDeadMask:
     if tryAcquireSys(q.lock):
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index 0b6a75b1f..b48855964 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -9,6 +9,8 @@
 
 # Implementation of some runtime checks.
 include system/indexerrors
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
 
 proc raiseRangeError(val: BiggestInt) {.compilerproc, noinline.} =
   when hostOS == "standalone":
@@ -16,6 +18,9 @@ proc raiseRangeError(val: BiggestInt) {.compilerproc, noinline.} =
   else:
     sysFatal(RangeDefect, "value out of range: ", $val)
 
+proc raiseIndexError4(l1, h1, h2: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, "index out of bounds: " & $l1 & ".." & $h1 & " notin 0.." & $(h2 - 1))
+
 proc raiseIndexError3(i, a, b: int) {.compilerproc, noinline.} =
   sysFatal(IndexDefect, formatErrorIndexBound(i, a, b))
 
@@ -26,18 +31,41 @@ proc raiseIndexError() {.compilerproc, noinline.} =
   sysFatal(IndexDefect, "index out of bounds")
 
 proc raiseFieldError(f: string) {.compilerproc, noinline.} =
+  ## remove after bootstrap > 1.5.1
   sysFatal(FieldDefect, f)
 
+when defined(nimV2):
+  proc raiseFieldError2(f: string, discVal: int) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, f & $discVal & "'")
+
+  proc raiseFieldErrorStr(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+else:
+  proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+
 proc raiseRangeErrorI(i, a, b: BiggestInt) {.compilerproc, noinline.} =
-  sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
 
 proc raiseRangeErrorF(i, a, b: float) {.compilerproc, noinline.} =
-  sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
 
 proc raiseRangeErrorU(i, a, b: uint64) {.compilerproc, noinline.} =
   # todo: better error reporting
   sysFatal(RangeDefect, "value out of range")
 
+proc raiseRangeErrorNoArgs() {.compilerproc, noinline.} =
+  sysFatal(RangeDefect, "value out of range")
+
 proc raiseObjectConversionError() {.compilerproc, noinline.} =
   sysFatal(ObjectConversionDefect, "invalid object conversion")
 
diff --git a/lib/system/comparisons.nim b/lib/system/comparisons.nim
index 60ac554a6..a8d78bb93 100644
--- a/lib/system/comparisons.nim
+++ b/lib/system/comparisons.nim
@@ -1,24 +1,24 @@
 # comparison operators:
-proc `==`*[Enum: enum](x, y: Enum): bool {.magic: "EqEnum", noSideEffect.}
+proc `==`*[Enum: enum](x, y: Enum): bool {.magic: "EqEnum", noSideEffect.} =
   ## Checks whether values within the *same enum* have the same underlying value.
-  ##
-  ## .. code-block:: Nim
-  ##  type
-  ##    Enum1 = enum
-  ##      Field1 = 3, Field2
-  ##    Enum2 = enum
-  ##      Place1, Place2 = 3
-  ##  var
-  ##    e1 = Field1
-  ##    e2 = Enum1(Place2)
-  ##  echo (e1 == e2) # true
-  ##  echo (e1 == Place2) # raises error
-proc `==`*(x, y: pointer): bool {.magic: "EqRef", noSideEffect.}
-  ## .. code-block:: Nim
-  ##  var # this is a wildly dangerous example
-  ##    a = cast[pointer](0)
-  ##    b = cast[pointer](nil)
-  ##  echo (a == b) # true due to the special meaning of `nil`/0 as a pointer
+  runnableExamples:
+    type
+      Enum1 = enum
+        field1 = 3, field2
+      Enum2 = enum
+        place1, place2 = 3
+    var
+      e1 = field1
+      e2 = place2.ord.Enum1
+    assert e1 == e2
+    assert not compiles(e1 == place2) # raises error
+proc `==`*(x, y: pointer): bool {.magic: "EqRef", noSideEffect.} =
+  ## Checks for equality between two `pointer` variables.
+  runnableExamples:
+    var # this is a wildly dangerous example
+      a = cast[pointer](0)
+      b = cast[pointer](nil)
+    assert a == b # true due to the special meaning of `nil`/0 as a pointer
 proc `==`*(x, y: string): bool {.magic: "EqStr", noSideEffect.}
   ## Checks for equality between two `string` variables.
 
@@ -26,117 +26,118 @@ proc `==`*(x, y: char): bool {.magic: "EqCh", noSideEffect.}
   ## Checks for equality between two `char` variables.
 proc `==`*(x, y: bool): bool {.magic: "EqB", noSideEffect.}
   ## Checks for equality between two `bool` variables.
-proc `==`*[T](x, y: set[T]): bool {.magic: "EqSet", noSideEffect.}
+proc `==`*[T](x, y: set[T]): bool {.magic: "EqSet", noSideEffect.} =
   ## Checks for equality between two variables of type `set`.
-  ##
-  ## .. code-block:: Nim
-  ##  var a = {1, 2, 2, 3} # duplication in sets is ignored
-  ##  var b = {1, 2, 3}
-  ##  echo (a == b) # true
+  runnableExamples:
+    assert {1, 2, 2, 3} == {1, 2, 3} # duplication in sets is ignored
+
 proc `==`*[T](x, y: ref T): bool {.magic: "EqRef", noSideEffect.}
   ## Checks that two `ref` variables refer to the same item.
 proc `==`*[T](x, y: ptr T): bool {.magic: "EqRef", noSideEffect.}
   ## Checks that two `ptr` variables refer to the same item.
-proc `==`*[T: proc](x, y: T): bool {.magic: "EqProc", noSideEffect.}
+proc `==`*[T: proc | iterator](x, y: T): bool {.magic: "EqProc", noSideEffect.}
   ## Checks that two `proc` variables refer to the same procedure.
 
 proc `<=`*[Enum: enum](x, y: Enum): bool {.magic: "LeEnum", noSideEffect.}
-proc `<=`*(x, y: string): bool {.magic: "LeStr", noSideEffect.}
+proc `<=`*(x, y: string): bool {.magic: "LeStr", noSideEffect.} =
   ## Compares two strings and returns true if `x` is lexicographically
   ## before `y` (uppercase letters come before lowercase letters).
-  ##
-  ## .. code-block:: Nim
-  ##     let
-  ##       a = "abc"
-  ##       b = "abd"
-  ##       c = "ZZZ"
-  ##     assert a <= b
-  ##     assert a <= a
-  ##     assert (a <= c) == false
-proc `<=`*(x, y: char): bool {.magic: "LeCh", noSideEffect.}
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*(x, y: char): bool {.magic: "LeCh", noSideEffect.} =
   ## Compares two chars and returns true if `x` is lexicographically
   ## before `y` (uppercase letters come before lowercase letters).
-  ##
-  ## .. code-block:: Nim
-  ##     let
-  ##       a = 'a'
-  ##       b = 'b'
-  ##       c = 'Z'
-  ##     assert a <= b
-  ##     assert a <= a
-  ##     assert (a <= c) == false
-proc `<=`*[T](x, y: set[T]): bool {.magic: "LeSet", noSideEffect.}
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*[T](x, y: set[T]): bool {.magic: "LeSet", noSideEffect.} =
   ## Returns true if `x` is a subset of `y`.
   ##
   ## A subset `x` has all of its members in `y` and `y` doesn't necessarily
   ## have more members than `x`. That is, `x` can be equal to `y`.
-  ##
-  ## .. code-block:: Nim
-  ##   let
-  ##     a = {3, 5}
-  ##     b = {1, 3, 5, 7}
-  ##     c = {2}
-  ##   assert a <= b
-  ##   assert a <= a
-  ##   assert (a <= c) == false
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
 proc `<=`*(x, y: bool): bool {.magic: "LeB", noSideEffect.}
 proc `<=`*[T](x, y: ref T): bool {.magic: "LePtr", noSideEffect.}
 proc `<=`*(x, y: pointer): bool {.magic: "LePtr", noSideEffect.}
 
 proc `<`*[Enum: enum](x, y: Enum): bool {.magic: "LtEnum", noSideEffect.}
-proc `<`*(x, y: string): bool {.magic: "LtStr", noSideEffect.}
+proc `<`*(x, y: string): bool {.magic: "LtStr", noSideEffect.} =
   ## Compares two strings and returns true if `x` is lexicographically
   ## before `y` (uppercase letters come before lowercase letters).
-  ##
-  ## .. code-block:: Nim
-  ##     let
-  ##       a = "abc"
-  ##       b = "abd"
-  ##       c = "ZZZ"
-  ##     assert a < b
-  ##     assert (a < a) == false
-  ##     assert (a < c) == false
-proc `<`*(x, y: char): bool {.magic: "LtCh", noSideEffect.}
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*(x, y: char): bool {.magic: "LtCh", noSideEffect.} =
   ## Compares two chars and returns true if `x` is lexicographically
   ## before `y` (uppercase letters come before lowercase letters).
-  ##
-  ## .. code-block:: Nim
-  ##     let
-  ##       a = 'a'
-  ##       b = 'b'
-  ##       c = 'Z'
-  ##     assert a < b
-  ##     assert (a < a) == false
-  ##     assert (a < c) == false
-proc `<`*[T](x, y: set[T]): bool {.magic: "LtSet", noSideEffect.}
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*[T](x, y: set[T]): bool {.magic: "LtSet", noSideEffect.} =
   ## Returns true if `x` is a strict or proper subset of `y`.
   ##
   ## A strict or proper subset `x` has all of its members in `y` but `y` has
   ## more elements than `y`.
-  ##
-  ## .. code-block:: Nim
-  ##   let
-  ##     a = {3, 5}
-  ##     b = {1, 3, 5, 7}
-  ##     c = {2}
-  ##   assert a < b
-  ##   assert (a < a) == false
-  ##   assert (a < c) == false
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
 proc `<`*(x, y: bool): bool {.magic: "LtB", noSideEffect.}
 proc `<`*[T](x, y: ref T): bool {.magic: "LtPtr", noSideEffect.}
 proc `<`*[T](x, y: ptr T): bool {.magic: "LtPtr", noSideEffect.}
 proc `<`*(x, y: pointer): bool {.magic: "LtPtr", noSideEffect.}
 
-template `!=`*(x, y: untyped): untyped =
-  ## Unequals operator. This is a shorthand for ``not (x == y)``.
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template `!=`*(x, y: untyped): untyped {.callsite.} =
+  ## Unequals operator. This is a shorthand for `not (x == y)`.
   not (x == y)
 
-template `>=`*(x, y: untyped): untyped =
-  ## "is greater or equals" operator. This is the same as ``y <= x``.
+template `>=`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater or equals" operator. This is the same as `y <= x`.
   y <= x
 
-template `>`*(x, y: untyped): untyped =
-  ## "is greater" operator. This is the same as ``y < x``.
+template `>`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater" operator. This is the same as `y < x`.
   y < x
 
 
@@ -162,14 +163,14 @@ proc `<`*(x, y: int32): bool {.magic: "LtI", noSideEffect.}
 proc `<`*(x, y: int64): bool {.magic: "LtI", noSideEffect.}
 
 proc `<=`*(x, y: uint): bool {.magic: "LeU", noSideEffect.}
-  ## Returns true if ``x <= y``.
+  ## Returns true if `x <= y`.
 proc `<=`*(x, y: uint8): bool {.magic: "LeU", noSideEffect.}
 proc `<=`*(x, y: uint16): bool {.magic: "LeU", noSideEffect.}
 proc `<=`*(x, y: uint32): bool {.magic: "LeU", noSideEffect.}
 proc `<=`*(x, y: uint64): bool {.magic: "LeU", noSideEffect.}
 
 proc `<`*(x, y: uint): bool {.magic: "LtU", noSideEffect.}
-  ## Returns true if ``x < y``.
+  ## Returns true if `x < y`.
 proc `<`*(x, y: uint8): bool {.magic: "LtU", noSideEffect.}
 proc `<`*(x, y: uint16): bool {.magic: "LtU", noSideEffect.}
 proc `<`*(x, y: uint32): bool {.magic: "LtU", noSideEffect.}
@@ -177,7 +178,7 @@ proc `<`*(x, y: uint64): bool {.magic: "LtU", noSideEffect.}
 
 proc `<=%`*(x, y: int): bool {.inline.} =
   ## Treats `x` and `y` as unsigned and compares them.
-  ## Returns true if ``unsigned(x) <= unsigned(y)``.
+  ## Returns true if `unsigned(x) <= unsigned(y)`.
   cast[uint](x) <= cast[uint](y)
 proc `<=%`*(x, y: int8): bool {.inline.} = cast[uint8](x) <= cast[uint8](y)
 proc `<=%`*(x, y: int16): bool {.inline.} = cast[uint16](x) <= cast[uint16](y)
@@ -186,7 +187,7 @@ proc `<=%`*(x, y: int64): bool {.inline.} = cast[uint64](x) <= cast[uint64](y)
 
 proc `<%`*(x, y: int): bool {.inline.} =
   ## Treats `x` and `y` as unsigned and compares them.
-  ## Returns true if ``unsigned(x) < unsigned(y)``.
+  ## Returns true if `unsigned(x) < unsigned(y)`.
   cast[uint](x) < cast[uint](y)
 proc `<%`*(x, y: int8): bool {.inline.} = cast[uint8](x) < cast[uint8](y)
 proc `<%`*(x, y: int16): bool {.inline.} = cast[uint16](x) < cast[uint16](y)
@@ -195,11 +196,11 @@ proc `<%`*(x, y: int64): bool {.inline.} = cast[uint64](x) < cast[uint64](y)
 
 template `>=%`*(x, y: untyped): untyped = y <=% x
   ## Treats `x` and `y` as unsigned and compares them.
-  ## Returns true if ``unsigned(x) >= unsigned(y)``.
+  ## Returns true if `unsigned(x) >= unsigned(y)`.
 
 template `>%`*(x, y: untyped): untyped = y <% x
   ## Treats `x` and `y` as unsigned and compares them.
-  ## Returns true if ``unsigned(x) > unsigned(y)``.
+  ## Returns true if `unsigned(x) > unsigned(y)`.
 
 proc `==`*(x, y: uint): bool {.magic: "EqI", noSideEffect.}
   ## Compares two unsigned integers for equality.
@@ -208,6 +209,14 @@ proc `==`*(x, y: uint16): bool {.magic: "EqI", noSideEffect.}
 proc `==`*(x, y: uint32): bool {.magic: "EqI", noSideEffect.}
 proc `==`*(x, y: uint64): bool {.magic: "EqI", noSideEffect.}
 
+proc `<=`*(x, y: float32): bool {.magic: "LeF64", noSideEffect.}
+proc `<=`*(x, y: float): bool {.magic: "LeF64", noSideEffect.}
+
+proc `<`*(x, y: float32): bool {.magic: "LtF64", noSideEffect.}
+proc `<`*(x, y: float): bool {.magic: "LtF64", noSideEffect.}
+
+proc `==`*(x, y: float32): bool {.magic: "EqF64", noSideEffect.}
+proc `==`*(x, y: float): bool {.magic: "EqF64", noSideEffect.}
 
 {.push stackTrace: off.}
 
@@ -222,6 +231,13 @@ proc min*(x, y: int32): int32 {.magic: "MinI", noSideEffect.} =
 proc min*(x, y: int64): int64 {.magic: "MinI", noSideEffect.} =
   ## The minimum value of two integers.
   if x <= y: x else: y
+proc min*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic minimum operator of 2 values based on `<=`.
+  if x <= y: x else: y
 
 proc max*(x, y: int): int {.magic: "MaxI", noSideEffect.} =
   if y <= x: x else: y
@@ -234,16 +250,23 @@ proc max*(x, y: int32): int32 {.magic: "MaxI", noSideEffect.} =
 proc max*(x, y: int64): int64 {.magic: "MaxI", noSideEffect.} =
   ## The maximum value of two integers.
   if y <= x: x else: y
+proc max*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic maximum operator of 2 values based on `<=`.
+  if y <= x: x else: y
 
 
 proc min*[T](x: openArray[T]): T =
-  ## The minimum value of `x`. ``T`` needs to have a ``<`` operator.
+  ## The minimum value of `x`. `T` needs to have a `<` operator.
   result = x[0]
   for i in 1..high(x):
     if x[i] < result: result = x[i]
 
 proc max*[T](x: openArray[T]): T =
-  ## The maximum value of `x`. ``T`` needs to have a ``<`` operator.
+  ## The maximum value of `x`. `T` needs to have a `<` operator.
   result = x[0]
   for i in 1..high(x):
     if result < x[i]: result = x[i]
@@ -252,11 +275,17 @@ proc max*[T](x: openArray[T]): T =
 
 
 proc clamp*[T](x, a, b: T): T =
-  ## Limits the value ``x`` within the interval [a, b].
+  ## Limits the value `x` within the interval \[a, b].
+  ## This proc is equivalent to but faster than `max(a, min(b, x))`.
+  ## 
+  ## .. warning:: `a <= b` is assumed and will not be checked (currently).
   ##
-  ## .. code-block:: Nim
-  ##   assert((1.4).clamp(0.0, 1.0) == 1.0)
-  ##   assert((0.5).clamp(0.0, 1.0) == 0.5)
+  ## **See also:**
+  ## `math.clamp` for a version that takes a `Slice[T]` instead.
+  runnableExamples:
+    assert (1.4).clamp(0.0, 1.0) == 1.0
+    assert (0.5).clamp(0.0, 1.0) == 0.5
+    assert 4.clamp(1, 3) == max(1, min(3, 4))
   if x < a: return a
   if x > b: return b
   return x
@@ -281,12 +310,8 @@ proc `==`*[T](x, y: seq[T]): bool {.noSideEffect.} =
   ## Generic equals operator for sequences: relies on a equals operator for
   ## the element type `T`.
   when nimvm:
-    when not defined(nimNoNil):
-      if x.isNil and y.isNil:
-        return true
-    else:
-      if x.len == 0 and y.len == 0:
-        return true
+    if x.len == 0 and y.len == 0:
+      return true
   else:
     when not defined(js):
       proc seqToPtr[T](x: seq[T]): pointer {.inline, noSideEffect.} =
@@ -299,13 +324,9 @@ proc `==`*[T](x, y: seq[T]): bool {.noSideEffect.} =
         return true
     else:
       var sameObject = false
-      asm """`sameObject` = `x` === `y`"""
+      {.emit: """`sameObject` = `x` === `y`;""".}
       if sameObject: return true
 
-  when not defined(nimNoNil):
-    if x.isNil or y.isNil:
-      return false
-
   if x.len != y.len:
     return false
 
diff --git a/lib/system/compilation.nim b/lib/system/compilation.nim
new file mode 100644
index 000000000..cdb976ed5
--- /dev/null
+++ b/lib/system/compilation.nim
@@ -0,0 +1,209 @@
+const
+  NimMajor* {.intdefine.}: int = 2
+    ## is the major number of Nim's version. Example:
+    ##   ```nim
+    ##   when (NimMajor, NimMinor, NimPatch) >= (1, 3, 1): discard
+    ##   ```
+    # see also std/private/since
+
+  NimMinor* {.intdefine.}: int = 2
+    ## is the minor number of Nim's version.
+    ## Odd for devel, even for releases.
+
+  NimPatch* {.intdefine.}: int = 1
+    ## is the patch number of Nim's version.
+    ## Odd for devel, even for releases.
+
+{.push profiler: off.}
+let nimvm* {.magic: "Nimvm", compileTime.}: bool = false
+  ## May be used only in `when` expression.
+  ## It is true in Nim VM context and false otherwise.
+{.pop.}
+
+const
+  isMainModule* {.magic: "IsMainModule".}: bool = false
+    ## True only when accessed in the main module. This works thanks to
+    ## compiler magic. It is useful to embed testing code in a module.
+
+  CompileDate* {.magic: "CompileDate".}: string = "0000-00-00"
+    ## The date (in UTC) of compilation as a string of the form
+    ## `YYYY-MM-DD`. This works thanks to compiler magic.
+
+  CompileTime* {.magic: "CompileTime".}: string = "00:00:00"
+    ## The time (in UTC) of compilation as a string of the form
+    ## `HH:MM:SS`. This works thanks to compiler magic.
+
+proc defined*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## defined.
+  ##
+  ## `x` is an external symbol introduced through the compiler's
+  ## `-d:x switch <nimc.html#compiler-usage-compileminustime-symbols>`_ to enable
+  ## build time conditionals:
+  ##   ```nim
+  ##   when not defined(release):
+  ##     # Do here programmer friendly expensive sanity checks.
+  ##   # Put here the normal code
+  ##   ```
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `define pragmas <manual.html#implementation-specific-pragmas-compileminustime-define-pragmas>`_
+
+proc declared*(x: untyped): bool {.magic: "Declared", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared. `x` has to be an identifier or a qualified identifier.
+  ##
+  ## This can be used to check whether a library provides a certain
+  ## feature or not:
+  ##   ```nim
+  ##   when not declared(strutils.toUpper):
+  ##     # provide our own toUpper proc here, because strutils is
+  ##     # missing it.
+  ##   ```
+  ##
+  ## See also:
+  ## * `declaredInScope <#declaredInScope,untyped>`_
+
+proc declaredInScope*(x: untyped): bool {.magic: "DeclaredInScope", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared in the current scope. `x` has to be an identifier.
+
+proc compiles*(x: untyped): bool {.magic: "Compiles", noSideEffect, compileTime.} =
+  ## Special compile-time procedure that checks whether `x` can be compiled
+  ## without any semantic error.
+  ## This can be used to check whether a type supports some operation:
+  ##   ```nim
+  ##   when compiles(3 + 4):
+  ##     echo "'+' for integers is available"
+  ##   ```
+  discard
+
+proc astToStr*[T](x: T): string {.magic: "AstToStr", noSideEffect.}
+  ## Converts the AST of `x` into a string representation. This is very useful
+  ## for debugging.
+
+proc runnableExamples*(rdoccmd = "", body: untyped) {.magic: "RunnableExamples".} =
+  ## A section you should use to mark `runnable example`:idx: code with.
+  ##
+  ## - In normal debug and release builds code within
+  ##   a `runnableExamples` section is ignored.
+  ## - The documentation generator is aware of these examples and considers them
+  ##   part of the `##` doc comment. As the last step of documentation
+  ##   generation each runnableExample is put in its own file `$file_examples$i.nim`,
+  ##   compiled and tested. The collected examples are
+  ##   put into their own module to ensure the examples do not refer to
+  ##   non-exported symbols.
+  runnableExamples:
+    proc timesTwo*(x: int): int =
+      ## This proc doubles a number.
+      runnableExamples:
+        # at module scope
+        const exported* = 123
+        assert timesTwo(5) == 10
+        block: # at block scope
+          defer: echo "done"
+      runnableExamples "-d:foo -b:cpp":
+        import std/compilesettings
+        assert querySetting(backend) == "cpp"
+        assert defined(foo)
+      runnableExamples "-r:off": ## this one is only compiled
+         import std/browsers
+         openDefaultBrowser "https://forum.nim-lang.org/"
+      2 * x
+
+proc compileOption*(option: string): bool {.
+  magic: "CompileOption", noSideEffect.} =
+  ## Can be used to determine an `on|off` compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples("--floatChecks:off"):
+    static: doAssert not compileOption("floatchecks")
+    {.push floatChecks: on.}
+    static: doAssert compileOption("floatchecks")
+    # floating point NaN and Inf checks enabled in this scope
+    {.pop.}
+
+proc compileOption*(option, arg: string): bool {.
+  magic: "CompileOptionArg", noSideEffect.} =
+  ## Can be used to determine an enum compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples:
+    when compileOption("opt", "size") and compileOption("gc", "boehm"):
+      discard "compiled with optimization for size and uses Boehm's GC"
+
+template currentSourcePath*: string = instantiationInfo(-1, true).filename
+  ## Returns the full file-system path of the current source.
+  ##
+  ## To get the directory containing the current source, use it with
+  ## `ospaths2.parentDir() <ospaths2.html#parentDir%2Cstring>`_ as
+  ## `currentSourcePath.parentDir()`.
+  ##
+  ## The path returned by this template is set at compile time.
+  ##
+  ## See the docstring of `macros.getProjectPath() <macros.html#getProjectPath>`_
+  ## for an example to see the distinction between the `currentSourcePath()`
+  ## and `getProjectPath()`.
+  ##
+  ## See also:
+  ## * `ospaths2.getCurrentDir() proc <ospaths2.html#getCurrentDir>`_
+
+proc slurp*(filename: string): string {.magic: "Slurp".}
+  ## This is an alias for `staticRead <#staticRead,string>`_.
+
+proc staticRead*(filename: string): string {.magic: "Slurp".}
+  ## Compile-time `readFile <syncio.html#readFile,string>`_ proc for easy
+  ## `resource`:idx: embedding:
+  ##
+  ## The maximum file size limit that `staticRead` and `slurp` can read is
+  ## near or equal to the *free* memory of the device you are using to compile.
+  ##   ```nim
+  ##   const myResource = staticRead"mydatafile.bin"
+  ##   ```
+  ##
+  ## `slurp <#slurp,string>`_ is an alias for `staticRead`.
+
+proc gorge*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## This is an alias for `staticExec <#staticExec,string,string,string>`_.
+
+proc staticExec*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## Executes an external process at compile-time and returns its text output
+  ## (stdout + stderr).
+  ##
+  ## If `input` is not an empty string, it will be passed as a standard input
+  ## to the executed program.
+  ##   ```nim
+  ##   const buildInfo = "Revision " & staticExec("git rev-parse HEAD") &
+  ##                     "\nCompiled on " & staticExec("uname -v")
+  ##   ```
+  ##
+  ## `gorge <#gorge,string,string,string>`_ is an alias for `staticExec`.
+  ##
+  ## Note that you can use this proc inside a pragma like
+  ## `passc <manual.html#implementation-specific-pragmas-passc-pragma>`_ or
+  ## `passl <manual.html#implementation-specific-pragmas-passl-pragma>`_.
+  ##
+  ## If `cache` is not empty, the results of `staticExec` are cached within
+  ## the `nimcache` directory. Use `--forceBuild` to get rid of this caching
+  ## behaviour then. `command & input & cache` (the concatenated string) is
+  ## used to determine whether the entry in the cache is still valid. You can
+  ## use versioning information for `cache`:
+  ##   ```nim
+  ##   const stateMachine = staticExec("dfaoptimizer", "input", "0.8.0")
+  ##   ```
+
+proc gorgeEx*(command: string, input = "", cache = ""): tuple[output: string,
+                                                              exitCode: int] =
+  ## Similar to `gorge <#gorge,string,string,string>`_ but also returns the
+  ## precious exit code.
+  discard
diff --git a/lib/system/coro_detection.nim b/lib/system/coro_detection.nim
new file mode 100644
index 000000000..f6c1b5c15
--- /dev/null
+++ b/lib/system/coro_detection.nim
@@ -0,0 +1,20 @@
+## Coroutine detection logic
+
+template coroutinesSupportedPlatform(): bool =
+  when defined(sparc) or defined(ELATE) or defined(boehmgc) or defined(gogc) or
+    defined(nogc) or defined(gcRegions) or defined(gcMarkAndSweep):
+    false
+  else:
+    true
+
+when defined(nimCoroutines):
+  # Explicit opt-in.
+  when not coroutinesSupportedPlatform():
+    {.error: "Coroutines are not supported on this architecture and/or garbage collector.".}
+  const nimCoroutines* = true
+elif defined(noNimCoroutines):
+  # Explicit opt-out.
+  const nimCoroutines* = false
+else:
+  # Autodetect coroutine support.
+  const nimCoroutines* = false
diff --git a/lib/system/countbits_impl.nim b/lib/system/countbits_impl.nim
new file mode 100644
index 000000000..34969cb32
--- /dev/null
+++ b/lib/system/countbits_impl.nim
@@ -0,0 +1,93 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Contains the used algorithms for counting bits.
+
+from std/private/bitops_utils import forwardImpl, castToUnsigned
+
+const useBuiltins* = not defined(noIntrinsicsBitOpts)
+const noUndefined* = defined(noUndefinedBitOpts)
+const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
+                         defined(clang)) and useBuiltins
+const useICC_builtins* = defined(icc) and useBuiltins
+const useVCC_builtins* = defined(vcc) and useBuiltins
+const arch64* = sizeof(int) == 8
+
+template countBitsImpl(n: uint32): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint32(n)
+  v = v - ((v shr 1'u32) and 0x55555555'u32)
+  v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
+  (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
+
+template countBitsImpl(n: uint64): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint64(n)
+  v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
+  v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
+  v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
+  ((v * 0x0101010101010101'u64) shr 56'u64).int
+
+
+when useGCC_builtins:
+  # Returns the number of set 1-bits in value.
+  proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
+  proc builtin_popcountll(x: culonglong): cint {.
+      importc: "__builtin_popcountll", cdecl.}
+
+elif useVCC_builtins:
+  # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
+  func builtin_popcnt16(a2: uint16): uint16 {.
+      importc: "__popcnt16", header: "<intrin.h>".}
+  func builtin_popcnt32(a2: uint32): uint32 {.
+      importc: "__popcnt", header: "<intrin.h>".}
+  func builtin_popcnt64(a2: uint64): uint64 {.
+      importc: "__popcnt64", header: "<intrin.h>".}
+
+elif useICC_builtins:
+  # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
+  # see also: https://software.intel.com/en-us/node/523362
+  # Count the number of bits set to 1 in an integer a, and return that count in dst.
+  func builtin_popcnt32(a: cint): cint {.
+      importc: "_popcnt", header: "<immintrin.h>".}
+  func builtin_popcnt64(a: uint64): cint {.
+      importc: "_popcnt64", header: "<immintrin.h>".}
+
+
+func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
+  ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
+  # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
+  # like GCC and MSVC
+  let x = x.castToUnsigned
+  when nimvm:
+    result = forwardImpl(countBitsImpl, x)
+  else:
+    when useGCC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
+      else: result = builtin_popcountll(x.culonglong).int
+    elif useVCC_builtins:
+      when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
+      elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
+    elif useICC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).cint).int
+    else:
+      when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
+      else: result = countBitsImpl(x.uint64)
+
+proc countBits32*(n: uint32): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
+
+proc countBits64*(n: uint64): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
diff --git a/lib/system/ctypes.nim b/lib/system/ctypes.nim
new file mode 100644
index 000000000..b788274bd
--- /dev/null
+++ b/lib/system/ctypes.nim
@@ -0,0 +1,84 @@
+## Some type definitions for compatibility between different
+## backends and platforms.
+
+type
+  BiggestInt* = int64
+    ## is an alias for the biggest signed integer type the Nim compiler
+    ## supports. Currently this is `int64`, but it is platform-dependent
+    ## in general.
+
+  BiggestFloat* = float64
+    ## is an alias for the biggest floating point type the Nim
+    ## compiler supports. Currently this is `float64`, but it is
+    ## platform-dependent in general.
+
+  BiggestUInt* = uint64
+    ## is an alias for the biggest unsigned integer type the Nim compiler
+    ## supports. Currently this is `uint64`, but it is platform-dependent
+    ## in general.
+
+when defined(windows):
+  type
+    clong* {.importc: "long", nodecl.} = int32
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint32
+      ## This is the same as the type `unsigned long` in *C*.
+else:
+  type
+    clong* {.importc: "long", nodecl.} = int
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint
+      ## This is the same as the type `unsigned long` in *C*.
+
+type # these work for most platforms:
+  cchar* {.importc: "char", nodecl.} = char
+    ## This is the same as the type `char` in *C*.
+  cschar* {.importc: "signed char", nodecl.} = int8
+    ## This is the same as the type `signed char` in *C*.
+  cshort* {.importc: "short", nodecl.} = int16
+    ## This is the same as the type `short` in *C*.
+  cint* {.importc: "int", nodecl.} = int32
+    ## This is the same as the type `int` in *C*.
+  csize_t* {.importc: "size_t", nodecl.} = uint
+    ## This is the same as the type `size_t` in *C*.
+  clonglong* {.importc: "long long", nodecl.} = int64
+    ## This is the same as the type `long long` in *C*.
+  cfloat* {.importc: "float", nodecl.} = float32
+    ## This is the same as the type `float` in *C*.
+  cdouble* {.importc: "double", nodecl.} = float64
+    ## This is the same as the type `double` in *C*.
+  clongdouble* {.importc: "long double", nodecl.} = BiggestFloat
+    ## This is the same as the type `long double` in *C*.
+    ## This C type is not supported by Nim's code generator.
+
+  cuchar* {.importc: "unsigned char", nodecl, deprecated: "use `char` or `uint8` instead".} = char
+    ## Deprecated: Use `uint8` instead.
+  cushort* {.importc: "unsigned short", nodecl.} = uint16
+    ## This is the same as the type `unsigned short` in *C*.
+  cuint* {.importc: "unsigned int", nodecl.} = uint32
+    ## This is the same as the type `unsigned int` in *C*.
+  culonglong* {.importc: "unsigned long long", nodecl.} = uint64
+    ## This is the same as the type `unsigned long long` in *C*.
+
+type
+  ByteAddress* {.deprecated: "use `uint`".} = int
+    ## is the signed integer type that should be used for converting
+    ## pointers to integer addresses for readability.
+
+  cstringArray* {.importc: "char**", nodecl.} = ptr UncheckedArray[cstring]
+    ## This is binary compatible to the type `char**` in *C*. The array's
+    ## high value is large enough to disable bounds checking in practice.
+    ## Use `cstringArrayToSeq proc <#cstringArrayToSeq,cstringArray,Natural>`_
+    ## to convert it into a `seq[string]`.
+
+when not defined(nimPreviewSlimSystem):
+  # pollutes namespace
+  type
+    PFloat32* {.deprecated: "use `ptr float32`".} = ptr float32
+      ## An alias for `ptr float32`.
+    PFloat64* {.deprecated: "use `ptr float64`".} = ptr float64
+      ## An alias for `ptr float64`.
+    PInt64* {.deprecated: "use `ptr int64`".} = ptr int64
+      ## An alias for `ptr int64`.
+    PInt32* {.deprecated: "use `ptr int32`".} = ptr int32
+      ## An alias for `ptr int32`.
diff --git a/lib/system/cyclebreaker.nim b/lib/system/cyclebreaker.nim
index 3d01eeb9d..45b0a5a65 100644
--- a/lib/system/cyclebreaker.nim
+++ b/lib/system/cyclebreaker.nim
@@ -53,7 +53,6 @@ depth-first traversal suffices.
 
 ]#
 
-type PT = ptr pointer
 include cellseqs_v2
 
 const
@@ -70,22 +69,24 @@ template color(c): untyped = c.rc and colorMask
 template setColor(c, col) =
   c.rc = c.rc and not colorMask or col
 
-proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
   let h = head(p)
   inc h.rc, rcIncrement
 
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
+
 type
   GcEnv = object
-    traceStack: CellSeq
+    traceStack: CellSeq[ptr pointer]
 
-proc trace(p: pointer; desc: PNimType; j: var GcEnv) {.inline.} =
+proc trace(p: pointer; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
   when false:
     cprintf("[Trace] desc: %p %p\n", desc, p)
     cprintf("[Trace] trace: %p\n", desc.traceImpl)
   if desc.traceImpl != nil:
     cast[TraceProc](desc.traceImpl)(p, addr(j))
 
-proc nimTraceRef(q: pointer; desc: PNimType; env: pointer) {.compilerRtl.} =
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl.} =
   let p = cast[ptr pointer](q)
   when traceCollector:
     cprintf("[Trace] raw: %p\n", p)
@@ -101,11 +102,11 @@ proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl.} =
     cprintf("[TraceDyn] deref: %p\n", p[])
   if p[] != nil:
     var j = cast[ptr GcEnv](env)
-    j.traceStack.add(p, cast[ptr PNimType](p[])[])
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
 
 var markerGeneration: int
 
-proc breakCycles(s: Cell; desc: PNimType) =
+proc breakCycles(s: Cell; desc: PNimTypeV2) =
   let markerColor = if (markerGeneration and 1) == 0: colRed
                     else: colYellow
   atomicInc markerGeneration
@@ -137,7 +138,8 @@ proc breakCycles(s: Cell; desc: PNimType) =
       else:
         # anyhow as a link that the produced destructor does not have to follow:
         u[] = nil
-        cprintf("[Bug] %p %s RC %ld\n", t, desc.name, t.rc shr rcShift)
+        when traceCollector:
+          cprintf("[Bug] %p %s RC %ld\n", t, desc.name, t.rc shr rcShift)
   deinit j.traceStack
 
 proc thinout*[T](x: ref T) {.inline.} =
@@ -147,7 +149,7 @@ proc thinout*[T](x: ref T) {.inline.} =
   ## and thus would keep the graph from being freed are `nil`'ed.
   ## This is a form of cycle collection that works well with Nim's ARC
   ## and its associated cost model.
-  proc getDynamicTypeInfo[T](x: T): PNimType {.magic: "GetTypeInfo", noSideEffect, locks: 0.}
+  proc getDynamicTypeInfo[T](x: T): PNimTypeV2 {.magic: "GetTypeInfoV2", noSideEffect.}
 
   breakCycles(head(cast[pointer](x)), getDynamicTypeInfo(x[]))
 
@@ -158,7 +160,7 @@ proc thinout*[T: proc](x: T) {.inline.} =
     """.}
 
   let p = rawEnv(x)
-  breakCycles(head(p), cast[ptr PNimType](p)[])
+  breakCycles(head(p), cast[ptr PNimTypeV2](p)[])
 
 proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
   if p != nil:
@@ -171,7 +173,7 @@ proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
       # According to Lins it's correct to do nothing else here.
       #cprintf("[DeCREF] %p\n", p)
 
-proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimType): bool {.compilerRtl, inl.} =
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
   if p != nil:
     var cell = head(p)
     if (cell.rc and not rcMask) == 0:
diff --git a/lib/system/cyclicrefs_bacon.nim b/lib/system/cyclicrefs_bacon.nim
deleted file mode 100644
index b9c820e4a..000000000
--- a/lib/system/cyclicrefs_bacon.nim
+++ /dev/null
@@ -1,363 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2020 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Cycle collector based on
-# https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
-# And ideas from Lins' in 2008 by the notion of "critical links", see
-# "Cyclic reference counting" by Rafael Dueire Lins
-# R.D. Lins / Information Processing Letters 109 (2008) 71–78
-#
-
-type PT = Cell
-include cellseqs_v2
-
-const
-  colBlack = 0b000
-  colGray = 0b001
-  colWhite = 0b010
-  colPurple = 0b011
-  isCycleCandidate = 0b100 # cell is marked as a cycle candidate
-  jumpStackFlag = 0b1000
-  colorMask = 0b011
-
-type
-  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
-  DisposeProc = proc (p: pointer) {.nimcall, benign.}
-
-template color(c): untyped = c.rc and colorMask
-template setColor(c, col) =
-  when col == colBlack:
-    c.rc = c.rc and not colorMask
-  else:
-    c.rc = c.rc and not colorMask or col
-
-proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
-  let h = head(p)
-  inc h.rc, rcIncrement
-  h.setColor colPurple # mark as potential cycle!
-
-const
-  useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
-
-type
-  GcEnv = object
-    traceStack: CellSeq
-    when useJumpStack:
-      jumpStack: CellSeq   # Lins' jump stack in order to speed up traversals
-    freed, touched: int
-
-proc trace(s: Cell; desc: PNimType; j: var GcEnv) {.inline.} =
-  if desc.traceImpl != nil:
-    var p = s +! sizeof(RefHeader)
-    cast[TraceProc](desc.traceImpl)(p, addr(j))
-
-when true:
-  template debug(str: cstring; s: Cell) = discard
-else:
-  proc debug(str: cstring; s: Cell) =
-    let p = s +! sizeof(RefHeader)
-    cprintf("[%s] name %s RC %ld\n", str, p, s.rc shr rcShift)
-
-proc free(s: Cell; desc: PNimType) {.inline.} =
-  when traceCollector:
-    cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
-  let p = s +! sizeof(RefHeader)
-
-  debug("free", s)
-
-  if desc.disposeImpl != nil:
-    cast[DisposeProc](desc.disposeImpl)(p)
-  nimRawDispose(p)
-
-proc nimTraceRef(q: pointer; desc: PNimType; env: pointer) {.compilerRtl.} =
-  let p = cast[ptr pointer](q)
-  if p[] != nil:
-    var j = cast[ptr GcEnv](env)
-    j.traceStack.add(head p[], desc)
-
-proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl.} =
-  let p = cast[ptr pointer](q)
-  if p[] != nil:
-    var j = cast[ptr GcEnv](env)
-    j.traceStack.add(head p[], cast[ptr PNimType](p[])[])
-
-var
-  roots {.threadvar.}: CellSeq
-
-proc unregisterCycle(s: Cell) =
-  # swap with the last element. O(1)
-  let idx = s.rootIdx
-  when false:
-    if idx >= roots.len or idx < 0:
-      cprintf("[Bug!] %ld\n", idx)
-      quit 1
-  roots.d[idx] = roots.d[roots.len-1]
-  roots.d[idx][0].rootIdx = idx
-  dec roots.len
-
-proc scanBlack(s: Cell; desc: PNimType; j: var GcEnv) =
-  #[
-  proc scanBlack(s: Cell) =
-    setColor(s, colBlack)
-    for t in sons(s):
-      t.rc = t.rc + rcIncrement
-      if t.color != colBlack:
-        scanBlack(t)
-  ]#
-  debug "scanBlack", s
-  s.setColor colBlack
-  trace(s, desc, j)
-  while j.traceStack.len > 0:
-    let (t, desc) = j.traceStack.pop()
-    inc t.rc, rcIncrement
-    debug "incRef", t
-    if t.color != colBlack:
-      t.setColor colBlack
-      trace(t, desc, j)
-
-proc markGray(s: Cell; desc: PNimType; j: var GcEnv) =
-  #[
-  proc markGray(s: Cell) =
-    if s.color != colGray:
-      setColor(s, colGray)
-      for t in sons(s):
-        t.rc = t.rc - rcIncrement
-        if t.color != colGray:
-          markGray(t)
-  ]#
-  if s.color != colGray:
-    s.setColor colGray
-    inc j.touched
-    trace(s, desc, j)
-    while j.traceStack.len > 0:
-      let (t, desc) = j.traceStack.pop()
-      dec t.rc, rcIncrement
-      when useJumpStack:
-        if (t.rc shr rcShift) >= 0 and (t.rc and jumpStackFlag) == 0:
-          t.rc = t.rc or jumpStackFlag
-          when traceCollector:
-            cprintf("[Now in jumpstack] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
-          j.jumpStack.add(t, desc)
-      if t.color != colGray:
-        t.setColor colGray
-        inc j.touched
-        trace(t, desc, j)
-
-proc scan(s: Cell; desc: PNimType; j: var GcEnv) =
-  #[
-  proc scan(s: Cell) =
-    if s.color == colGray:
-      if s.rc > 0:
-        scanBlack(s)
-      else:
-        s.setColor(colWhite)
-        for t in sons(s): scan(t)
-  ]#
-  if s.color == colGray:
-    if (s.rc shr rcShift) >= 0:
-      scanBlack(s, desc, j)
-      # XXX this should be done according to Lins' paper but currently breaks
-      #when useJumpStack:
-      #  s.setColor colPurple
-    else:
-      when useJumpStack:
-        # first we have to repair all the nodes we have seen
-        # that are still alive; we also need to mark what they
-        # refer to as alive:
-        while j.jumpStack.len > 0:
-          let (t, desc) = j.jumpStack.pop
-          # not in jump stack anymore!
-          t.rc = t.rc and not jumpStackFlag
-          if t.color == colGray and (t.rc shr rcShift) >= 0:
-            scanBlack(t, desc, j)
-            # XXX this should be done according to Lins' paper but currently breaks
-            #t.setColor colPurple
-            when traceCollector:
-              cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
-
-      s.setColor(colWhite)
-      trace(s, desc, j)
-      while j.traceStack.len > 0:
-        let (t, desc) = j.traceStack.pop()
-        if t.color == colGray:
-          if (t.rc shr rcShift) >= 0:
-            scanBlack(t, desc, j)
-          else:
-            when useJumpStack:
-              # first we have to repair all the nodes we have seen
-              # that are still alive; we also need to mark what they
-              # refer to as alive:
-              while j.jumpStack.len > 0:
-                let (t, desc) = j.jumpStack.pop
-                # not in jump stack anymore!
-                t.rc = t.rc and not jumpStackFlag
-                if t.color == colGray and (t.rc shr rcShift) >= 0:
-                  scanBlack(t, desc, j)
-                  # XXX this should be done according to Lins' paper but currently breaks
-                  #t.setColor colPurple
-                  when traceCollector:
-                    cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
-
-            t.setColor(colWhite)
-            trace(t, desc, j)
-
-proc collectWhite(s: Cell; desc: PNimType; j: var GcEnv) =
-  #[
-  proc collectWhite(s: Cell) =
-    if s.color == colWhite and not buffered(s):
-      s.setColor(colBlack)
-      for t in sons(s):
-        collectWhite(t)
-      free(s) # watch out, a bug here!
-  ]#
-  if s.color == colWhite and (s.rc and isCycleCandidate) == 0:
-    s.setColor(colBlack)
-    when false:
-      # optimized version (does not work)
-      j.traceStack.add(s, desc)
-      # this way of writing the loop means we can free all the nodes
-      # afterwards avoiding the "use after free" bug in the paper.
-      var i = 0
-      while i < j.traceStack.len:
-        let (t, desc) = j.traceStack.d[j.traceStack.len-1]
-        inc i
-        if t.color == colWhite and (t.rc and isCycleCandidate) == 0:
-          t.setColor(colBlack)
-          trace(t, desc, j)
-
-      for i in 0 ..< j.traceStack.len:
-        free(j.traceStack.d[i][0], j.traceStack.d[i][1])
-      j.traceStack.len = 0
-    else:
-      var subgraph: CellSeq
-      init subgraph
-      subgraph.add(s, desc)
-      trace(s, desc, j)
-      while j.traceStack.len > 0:
-        let (t, desc) = j.traceStack.pop()
-        if t.color == colWhite and (t.rc and isCycleCandidate) == 0:
-          subgraph.add(t, desc)
-          t.setColor(colBlack)
-          trace(t, desc, j)
-      for i in 0 ..< subgraph.len:
-        free(subgraph.d[i][0], subgraph.d[i][1])
-      inc j.freed, subgraph.len
-      deinit subgraph
-
-proc collectCyclesBacon(j: var GcEnv) =
-  # pretty direct translation from
-  # https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
-  # Fig. 2. Synchronous Cycle Collection
-  #[
-    for s in roots:
-      markGray(s)
-    for s in roots:
-      scan(s)
-    for s in roots:
-      remove s from roots
-      s.buffered = false
-      collectWhite(s)
-  ]#
-  for i in 0 ..< roots.len:
-    markGray(roots.d[i][0], roots.d[i][1], j)
-  for i in 0 ..< roots.len:
-    scan(roots.d[i][0], roots.d[i][1], j)
-
-  for i in 0 ..< roots.len:
-    let s = roots.d[i][0]
-    s.rc = s.rc and not isCycleCandidate
-    collectWhite(s, roots.d[i][1], j)
-  #roots.len = 0
-
-const
-  defaultThreshold = 10_000
-
-var
-  rootsThreshold = defaultThreshold
-
-proc collectCycles() =
-  ## Collect cycles.
-  var j: GcEnv
-  init j.traceStack
-  when useJumpStack:
-    init j.jumpStack
-    collectCyclesBacon(j)
-    while j.jumpStack.len > 0:
-      let (t, desc) = j.jumpStack.pop
-      # not in jump stack anymore!
-      t.rc = t.rc and not jumpStackFlag
-    deinit j.jumpStack
-  else:
-    collectCyclesBacon(j)
-
-  deinit j.traceStack
-  deinit roots
-  # compute the threshold based on the previous history
-  # of the cycle collector's effectiveness:
-  # we're effective when we collected 50% or more of the nodes
-  # we touched. If we're effective, we can reset the threshold:
-  if j.freed * 2 >= j.touched:
-    rootsThreshold = defaultThreshold
-  else:
-    rootsThreshold = rootsThreshold * 3 div 2
-  when false:
-    cprintf("[collectCycles] freed %ld new threshold %ld\n", j.freed, rootsThreshold)
-
-proc registerCycle(s: Cell; desc: PNimType) =
-  if roots.d == nil: init(roots)
-  s.rootIdx = roots.len
-  add(roots, s, desc)
-  if roots.len >= rootsThreshold:
-    collectCycles()
-
-proc GC_fullCollect* =
-  ## Forces a full garbage collection pass. With ``--gc:orc`` triggers the cycle
-  ## collector.
-  collectCycles()
-
-proc GC_enableMarkAndSweep() =
-  rootsThreshold = defaultThreshold
-
-proc GC_disableMarkAndSweep() =
-  rootsThreshold = high(int)
-
-proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimType) {.noinline.} =
-  if isDestroyAction:
-    if (s.rc and isCycleCandidate) != 0:
-      s.rc = s.rc and not isCycleCandidate
-      unregisterCycle(s)
-  else:
-    # do not call 'rememberCycle' again unless this cell
-    # got an 'incRef' event:
-    #s.setColor colGreen  # XXX This is wrong!
-    if (s.rc and isCycleCandidate) == 0:
-      s.rc = s.rc or isCycleCandidate
-      registerCycle(s, desc)
-
-proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
-  if p != nil:
-    var cell = head(p)
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      #cprintf("[DESTROY] %p\n", p)
-    else:
-      dec cell.rc, rcIncrement
-    if cell.color == colPurple:
-      rememberCycle(result, cell, cast[ptr PNimType](p)[])
-
-proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimType): bool {.compilerRtl, inl.} =
-  if p != nil:
-    var cell = head(p)
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      #cprintf("[DESTROY] %p %s\n", p, desc.name)
-    else:
-      dec cell.rc, rcIncrement
-    if cell.color == colPurple:
-      rememberCycle(result, cell, desc)
diff --git a/lib/system/cyclicrefs_v2.nim b/lib/system/cyclicrefs_v2.nim
deleted file mode 100644
index 564562c99..000000000
--- a/lib/system/cyclicrefs_v2.nim
+++ /dev/null
@@ -1,207 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2019 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Cycle collector based on Lins' Jump Stack and other ideas,
-# see for example:
-# https://pdfs.semanticscholar.org/f2b2/0d168acf38ff86305809a55ef2c5d6ebc787.pdf
-# Further refinement in 2008 by the notion of "critical links", see
-# "Cyclic reference counting" by Rafael Dueire Lins
-# R.D. Lins / Information Processing Letters 109 (2008) 71–78
-
-type PT = ptr pointer
-include cellseqs_v2
-
-const
-  colGreen = 0b000
-  colYellow = 0b001
-  colRed = 0b010
-  jumpStackFlag = 0b100  # stored in jumpstack
-  rcShift = 3      # shift by rcShift to get the reference counter
-  colorMask = 0b011
-
-type
-  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
-  DisposeProc = proc (p: pointer) {.nimcall, benign.}
-
-template color(c): untyped = c.rc and colorMask
-template setColor(c, col) =
-  when col == colGreen:
-    c.rc = c.rc and not colorMask
-  else:
-    c.rc = c.rc and not colorMask or col
-
-proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
-  let h = head(p)
-  inc h.rc, rcIncrement
-  h.setColor colYellow # mark as potential cycle!
-
-proc markCyclic*[T](x: ref T) {.inline.} =
-  ## Mark the underlying object as a candidate for cycle collections.
-  ## Experimental API. Do not use!
-  let h = head(cast[pointer](x))
-  h.setColor colYellow
-
-type
-  GcEnv = object
-    traceStack: CellSeq
-    jumpStack: CellSeq
-
-proc trace(s: Cell; desc: PNimType; j: var GcEnv) {.inline.} =
-  if desc.traceImpl != nil:
-    var p = s +! sizeof(RefHeader)
-    cast[TraceProc](desc.traceImpl)(p, addr(j))
-
-proc free(s: Cell; desc: PNimType) {.inline.} =
-  when traceCollector:
-    cprintf("[From ] %p rc %ld color %ld in jumpstack %ld\n", s, s.rc shr rcShift,
-            s.color, s.rc and jumpStackFlag)
-  let p = s +! sizeof(RefHeader)
-  if desc.disposeImpl != nil:
-    cast[DisposeProc](desc.disposeImpl)(p)
-  nimRawDispose(p)
-
-proc collect(s: Cell; desc: PNimType; j: var GcEnv) =
-  #[ Algorithm from the paper:
-
-    Collect(S) =
-      If (Color(S) == red) then
-        RC(S) = 1;
-        Color(S) = green;
-        for T in Sons(S) do
-          Remove(<S, T>);
-          if (Color(T) == red) then Collect(T);
-      free_list = S;
-
-  (Whatever that really means...)
-  ]#
-  if s.color == colRed:
-    s.setColor colGreen
-    trace(s, desc, j)
-    while j.traceStack.len > 0:
-      let (p, desc) = j.traceStack.pop()
-      let t = head(p[])
-      #Remove(<S, T>):
-      p[] = nil
-      if t.color == colRed:
-        t.setColor colGreen
-        trace(t, desc, j)
-        free(t, desc)
-    free(s, desc)
-    #cprintf("[Cycle free] %p %ld\n", s, s.rc shr rcShift)
-
-proc markRed(s: Cell; desc: PNimType; j: var GcEnv) =
-  if s.color != colRed:
-    s.setColor colRed
-    trace(s, desc, j)
-    while j.traceStack.len > 0:
-      let (p, desc) = j.traceStack.pop()
-      let t = head(p[])
-      when traceCollector:
-        cprintf("[Cycle dec] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
-      dec t.rc, rcIncrement
-      if (t.rc and not rcMask) >= 0 and (t.rc and jumpStackFlag) == 0:
-        t.rc = t.rc or jumpStackFlag
-        when traceCollector:
-          cprintf("[Now in jumpstack] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
-        j.jumpStack.add(p, desc)
-      if t.color != colRed:
-        t.setColor colRed
-        trace(t, desc, j)
-
-proc scanGreen(s: Cell; desc: PNimType; j: var GcEnv) =
-  s.setColor colGreen
-  trace(s, desc, j)
-  while j.traceStack.len > 0:
-    let (p, desc) = j.traceStack.pop()
-    let t = head(p[])
-    if t.color != colGreen:
-      t.setColor colGreen
-      trace(t, desc, j)
-    inc t.rc, rcIncrement
-    when traceCollector:
-      cprintf("[Cycle inc] %p %ld color %ld\n", t, t.rc shr rcShift, t.color)
-
-proc nimTraceRef(q: pointer; desc: PNimType; env: pointer) {.compilerRtl.} =
-  let p = cast[ptr pointer](q)
-  if p[] != nil:
-    var j = cast[ptr GcEnv](env)
-    j.traceStack.add(p, desc)
-
-proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl.} =
-  let p = cast[ptr pointer](q)
-  if p[] != nil:
-    var j = cast[ptr GcEnv](env)
-    j.traceStack.add(p, cast[ptr PNimType](p[])[])
-
-proc scan(s: Cell; desc: PNimType; j: var GcEnv) =
-  when traceCollector:
-    cprintf("[doScanGreen] %p %ld\n", s, s.rc shr rcShift)
-  # even after trial deletion, `s` is still alive, so undo
-  # the decrefs by calling `scanGreen`:
-  if (s.rc and not rcMask) >= 0:
-    scanGreen(s, desc, j)
-    s.setColor colYellow
-  else:
-    # first we have to repair all the nodes we have seen
-    # that are still alive; we also need to mark what they
-    # refer to as alive:
-    while j.jumpStack.len > 0:
-      let (p, desc) = j.jumpStack.pop
-      let t = head(p[])
-      # not in jump stack anymore!
-      t.rc = t.rc and not jumpStackFlag
-      if t.color == colRed and (t.rc and not rcMask) >= 0:
-        scanGreen(t, desc, j)
-        t.setColor colYellow
-        when traceCollector:
-          cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
-    # we have proven that `s` and its subgraph are dead, so we can
-    # collect these nodes:
-    collect(s, desc, j)
-
-proc traceCycle(s: Cell; desc: PNimType) {.noinline.} =
-  when traceCollector:
-    cprintf("[traceCycle] %p %ld\n", s, s.rc shr rcShift)
-  var j: GcEnv
-  init j.jumpStack
-  init j.traceStack
-  markRed(s, desc, j)
-  scan(s, desc, j)
-  while j.jumpStack.len > 0:
-    let (p, desc) = j.jumpStack.pop
-    let t = head(p[])
-    # not in jump stack anymore!
-    t.rc = t.rc and not jumpStackFlag
-  deinit j.jumpStack
-  deinit j.traceStack
-
-proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
-  if p != nil:
-    var cell = head(p)
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      #cprintf("[DESTROY] %p\n", p)
-    else:
-      dec cell.rc, rcIncrement
-      if cell.color == colYellow:
-        let desc = cast[ptr PNimType](p)[]
-        traceCycle(cell, desc)
-      # According to Lins it's correct to do nothing else here.
-      #cprintf("[DeCREF] %p\n", p)
-
-proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimType): bool {.compilerRtl, inl.} =
-  if p != nil:
-    var cell = head(p)
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      #cprintf("[DESTROY] %p %s\n", p, desc.name)
-    else:
-      dec cell.rc, rcIncrement
-      if cell.color == colYellow: traceCycle(cell, desc)
-      #cprintf("[DeCREF] %p %s %ld\n", p, desc.name, cell.rc)
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 801821d26..72d35f518 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -61,8 +61,8 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType;
 proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
                         tab: var PtrTable) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericDeepCopyAux(cast[pointer](d +% n.offset),
@@ -85,32 +85,40 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
 
 proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericDeepCopyAux 2")
   case mt.kind
   of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil:
-      unsureAsgnRef(x, s2)
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
     else:
-      unsureAsgnRef(x, copyDeepString(cast[NimString](s2)))
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyDeepString(cast[NimString](s2)))
   of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      unsureAsgnRef(x, s2)
-      return
-    sysAssert(dest != nil, "genericDeepCopyAux 3")
-    unsureAsgnRef(x, newSeq(mt, seq.len))
-    var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    for i in 0..seq.len-1:
-      genericDeepCopyAux(
-        cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
-        cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
-        mt.base, tab)
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericDeepCopyAux, tab)
+    else:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericDeepCopyAux 3")
+      unsureAsgnRef(x, newSeq(mt, seq.len))
+      var dst = cast[int](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        genericDeepCopyAux(
+          cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          mt.base, tab)
   of tyObject:
     # we need to copy m_type field for tyObject, as it could be empty for
     # sequence reallocations:
@@ -132,7 +140,10 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
       unsureAsgnRef(cast[PPointer](dest), s2)
     elif mt.base.deepcopy != nil:
       let z = mt.base.deepcopy(s2)
-      unsureAsgnRef(cast[PPointer](dest), z)
+      when defined(nimSeqsV2):
+        cast[PPointer](dest)[] = z
+      else:
+        unsureAsgnRef(cast[PPointer](dest), z)
     else:
       let z = tab.get(s2)
       if z == nil:
@@ -149,10 +160,16 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
             let x = usrToCell(s2)
             let realType = x.typ
             sysAssert realType == mt, " types do differ"
-          # this version should work for any possible GC:
-          let typ = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[] else: mt.base
-          let z = newObj(mt, typ.size)
-          unsureAsgnRef(cast[PPointer](dest), z)
+          when defined(nimSeqsV2):
+            let typ = if mt.base.kind == tyObject: cast[PNimType](cast[ptr PNimTypeV2](s2)[].typeInfoV1)
+                      else: mt.base
+            let z = nimNewObj(typ.size, typ.align)
+            cast[PPointer](dest)[] = z
+          else:
+            # this version should work for any other GC:
+            let typ = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[] else: mt.base
+            let z = newObj(mt, typ.size)
+            unsureAsgnRef(cast[PPointer](dest), z)
           tab.put(s2, z)
           genericDeepCopyAux(z, s2, typ, tab)
       else:
@@ -168,11 +185,11 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
     copyMem(dest, src, mt.size)
 
 proc genericDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
-  GC_disable()
+  when not defined(nimSeqsV2): GC_disable()
   var tab = initPtrTable()
   genericDeepCopyAux(dest, src, mt, tab)
   deinit tab
-  GC_enable()
+  when not defined(nimSeqsV2): GC_enable()
 
 proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   # also invoked for 'string'
@@ -182,8 +199,8 @@ proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
 proc genericDeepCopyOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericDeepCopy(cast[pointer](d +% i *% mt.base.size),
                     cast[pointer](s +% i *% mt.base.size), mt.base)
diff --git a/lib/system/dollars.nim b/lib/system/dollars.nim
index b2f0186b3..89a739d5a 100644
--- a/lib/system/dollars.nim
+++ b/lib/system/dollars.nim
@@ -1,32 +1,38 @@
-import std/private/since
-
-proc `$`*(x: int): string {.magic: "IntToStr", noSideEffect.}
-  ## The stringify operator for an integer argument. Returns `x`
-  ## converted to a decimal string. ``$`` is Nim's general way of
-  ## spelling `toString`:idx:.
-
-when defined(js):
-  since (1, 3):
-    proc `$`*(x: uint): string =
-      ## Caveat: currently implemented as $(cast[int](x)), tied to current
-      ## semantics of js' Number type.
-      # for c, see strmantle.`$`
-      $(cast[int](x))
-
-    proc `$`*(x: uint64): string =
-      ## Compatibility note:
-      ## the results may change in future releases if/when js target implements
-      ## 64bit ints.
-      # pending https://github.com/nim-lang/RFCs/issues/187
-      $(cast[int](x))
-
-proc `$`*(x: int64): string {.magic: "Int64ToStr", noSideEffect.}
-  ## The stringify operator for an integer argument. Returns `x`
-  ## converted to a decimal string.
-
-proc `$`*(x: float): string {.magic: "FloatToStr", noSideEffect.}
-  ## The stringify operator for a float argument. Returns `x`
-  ## converted to a decimal string.
+## `$` is Nim's general way of spelling `toString`:idx:.
+runnableExamples:
+  assert $0.1 == "0.1"
+  assert $(-2*3) == "-6"
+
+import std/private/[digitsutils, miscdollars]
+
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export addFloat
+
+  func `$`*(x: float | float32): string =
+    ## Outplace version of `addFloat`.
+    result.addFloat(x)
+
+proc `$`*(x: int): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: int64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: uint64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  addInt(result, x)
+
+# same as old `ctfeWhitelist` behavior, whether or not this is a good idea.
+template gen(T) =
+  # xxx simplify this by supporting this in compiler: int{lit} | uint64{lit} | int64{lit}
+  func `$`*(x: T{lit}): string {.compileTime.} = result.addInt(x)
+gen(int)
+gen(uint64)
+gen(int64)
+
 
 proc `$`*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
   ## The stringify operator for a boolean argument. Returns `x`
@@ -35,9 +41,9 @@ proc `$`*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
 proc `$`*(x: char): string {.magic: "CharToStr", noSideEffect.}
   ## The stringify operator for a character argument. Returns `x`
   ## converted to a string.
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   assert $'c' == "c"
+  ##   ```
 
 proc `$`*(x: cstring): string {.magic: "CStrToStr", noSideEffect.}
   ## The stringify operator for a CString argument. Returns `x`
@@ -46,73 +52,40 @@ proc `$`*(x: cstring): string {.magic: "CStrToStr", noSideEffect.}
 proc `$`*(x: string): string {.magic: "StrToStr", noSideEffect.}
   ## The stringify operator for a string argument. Returns `x`
   ## as it is. This operator is useful for generic code, so
-  ## that ``$expr`` also works if ``expr`` is already a string.
+  ## that `$expr` also works if `expr` is already a string.
 
 proc `$`*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect.}
   ## The stringify operator for an enumeration argument. This works for
   ## any enumeration type thanks to compiler magic.
   ##
-  ## If a ``$`` operator for a concrete enumeration is provided, this is
+  ## If a `$` operator for a concrete enumeration is provided, this is
   ## used instead. (In other words: *Overwriting* is possible.)
 
 proc `$`*(t: typedesc): string {.magic: "TypeTrait".}
   ## Returns the name of the given type.
   ##
-  ## For more procedures dealing with ``typedesc``, see
+  ## For more procedures dealing with `typedesc`, see
   ## `typetraits module <typetraits.html>`_.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   doAssert $(typeof(42)) == "int"
   ##   doAssert $(typeof("Foo")) == "string"
   ##   static: doAssert $(typeof(@['A', 'B'])) == "seq[char]"
+  ##   ```
 
-when defined(nimHasIsNamedTuple):
-  proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
-else:
-  # for bootstrap; remove after release 1.2
-  proc isNamedTuple(T: typedesc): bool =
-    # Taken from typetraits.
-    when T isnot tuple: result = false
-    else:
-      var t: T
-      for name, _ in t.fieldPairs:
-        when name == "Field0":
-          return compiles(t.Field0)
-        else:
-          return true
-      return false
-
-
-proc `$`*[T: tuple|object](x: T): string =
-  ## Generic ``$`` operator for tuples that is lifted from the components
+proc `$`*[T: tuple](x: T): string =
+  ## Generic `$` operator for tuples that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(23, 45) == "(23, 45)"
   ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
   ##   $() == "()"
-  result = "("
-  const isNamed = T is object or isNamedTuple(T)
-  var count = 0
-  for name, value in fieldPairs(x):
-    if count > 0: result.add(", ")
-    when isNamed:
-      result.add(name)
-      result.add(": ")
-    count.inc
-    when compiles($value):
-      when value isnot string and value isnot seq and compiles(value.isNil):
-        if value.isNil: result.add "nil"
-        else: result.addQuoted(value)
-      else:
-        result.addQuoted(value)
-    else:
-      result.add("...")
-  when not isNamed:
-    if count == 1:
-      result.add(",") # $(1,) should print as the semantically legal (1,)
-  result.add(")")
+  ##   ```
+  tupleObjectDollar(result, x)
 
+when not defined(nimPreviewSlimSystem):
+  import std/objectdollar
+  export objectdollar
 
 proc collectionToString[T](x: T, prefix, separator, suffix: string): string =
   result = prefix
@@ -134,27 +107,27 @@ proc collectionToString[T](x: T, prefix, separator, suffix: string): string =
   result.add(suffix)
 
 proc `$`*[T](x: set[T]): string =
-  ## Generic ``$`` operator for sets that is lifted from the components
+  ## Generic `$` operator for sets that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   ${23, 45} == "{23, 45}"
+  ##   ```
   collectionToString(x, "{", ", ", "}")
 
 proc `$`*[T](x: seq[T]): string =
-  ## Generic ``$`` operator for seqs that is lifted from the components
+  ## Generic `$` operator for seqs that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
   collectionToString(x, "@[", ", ", "]")
 
 proc `$`*[T, U](x: HSlice[T, U]): string =
-  ## Generic ``$`` operator for slices that is lifted from the components
+  ## Generic `$` operator for slices that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##  $(1 .. 5) == "1 .. 5"
+  ##  ```
   result = $x.a
   result.add(" .. ")
   result.add($x.b)
@@ -162,13 +135,13 @@ proc `$`*[T, U](x: HSlice[T, U]): string =
 
 when not defined(nimNoArrayToString):
   proc `$`*[T, IDX](x: array[IDX, T]): string =
-    ## Generic ``$`` operator for arrays that is lifted from the components.
+    ## Generic `$` operator for arrays that is lifted from the components.
     collectionToString(x, "[", ", ", "]")
 
 proc `$`*[T](x: openArray[T]): string =
-  ## Generic ``$`` operator for openarrays that is lifted from the components
+  ## Generic `$` operator for openarrays that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
   collectionToString(x, "[", ", ", "]")
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index fe4797c5f..2162b234f 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -47,14 +47,14 @@ proc nimLoadLibraryError(path: string) =
         copyMem(msg[msgIdx].addr, badExe.cstring, badExe.len)
       discard MessageBoxA(nil, msg[0].addr, nil, 0)
   cstderr.rawWrite("\n")
-  quit(1)
+  rawQuit(1)
 
 proc procAddrError(name: cstring) {.compilerproc, nonReloadable, hcrInline.} =
   # carefully written to avoid memory allocation:
   cstderr.rawWrite("could not import: ")
   cstderr.rawWrite(name)
   cstderr.rawWrite("\n")
-  quit(1)
+  rawQuit(1)
 
 # this code was inspired from Lua's source code:
 # Lua - An Extensible Extension Language
@@ -91,7 +91,10 @@ when defined(posix):
     dlclose(lib)
 
   proc nimLoadLibrary(path: string): LibHandle =
-    result = dlopen(path, RTLD_NOW)
+    let flags =
+      when defined(globalSymbols): RTLD_NOW or RTLD_GLOBAL
+      else: RTLD_NOW
+    result = dlopen(path, flags)
     when defined(nimDebugDlOpen):
       let error = dlerror()
       if error != nil:
@@ -158,41 +161,38 @@ elif defined(windows) or defined(dos):
         dec(m)
         k = k div 10
         if k == 0: break
-      when defined(nimNoArrayToCstringConversion):
-        result = getProcAddress(cast[THINSTANCE](lib), addr decorated)
-      else:
-        result = getProcAddress(cast[THINSTANCE](lib), decorated)
+      result = getProcAddress(cast[THINSTANCE](lib), cast[cstring](addr decorated))
       if result != nil: return
     procAddrError(name)
 
 elif defined(genode):
 
-  proc nimUnloadLibrary(lib: LibHandle) {.
-    error: "nimUnloadLibrary not implemented".}
+  proc nimUnloadLibrary(lib: LibHandle) =
+    raiseAssert("nimUnloadLibrary not implemented")
 
-  proc nimLoadLibrary(path: string): LibHandle {.
-    error: "nimLoadLibrary not implemented".}
+  proc nimLoadLibrary(path: string): LibHandle =
+    raiseAssert("nimLoadLibrary not implemented")
 
-  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.
-    error: "nimGetProcAddr not implemented".}
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    raiseAssert("nimGetProcAddr not implemented")
 
-elif defined(nintendoswitch):
+elif defined(nintendoswitch) or defined(freertos) or defined(zephyr) or defined(nuttx):
   proc nimUnloadLibrary(lib: LibHandle) =
     cstderr.rawWrite("nimUnLoadLibrary not implemented")
     cstderr.rawWrite("\n")
-    quit(1)
+    rawQuit(1)
 
   proc nimLoadLibrary(path: string): LibHandle =
     cstderr.rawWrite("nimLoadLibrary not implemented")
     cstderr.rawWrite("\n")
-    quit(1)
+    rawQuit(1)
 
 
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     cstderr.rawWrite("nimGetProAddr not implemented")
     cstderr.rawWrite(name)
     cstderr.rawWrite("\n")
-    quit(1)
+    rawQuit(1)
 
 else:
   {.error: "no implementation for dyncalls".}
diff --git a/lib/system/embedded.nim b/lib/system/embedded.nim
index c4f15a336..ea6776f58 100644
--- a/lib/system/embedded.nim
+++ b/lib/system/embedded.nim
@@ -19,8 +19,9 @@ proc nimFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} = discard
 proc popFrame {.compilerRtl, inl.} = discard
 
 proc setFrame(s: PFrame) {.compilerRtl, inl.} = discard
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
-proc popSafePoint {.compilerRtl, inl.} = discard
+when not gotoBasedExceptions:
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
+  proc popSafePoint {.compilerRtl, inl.} = discard
 proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = discard
 proc popCurrentException {.compilerRtl, inl.} = discard
 
@@ -34,6 +35,10 @@ proc quitOrDebug() {.noreturn, importc: "abort", header: "<stdlib.h>", nodecl.}
 proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
   sysFatal(ReraiseDefect, "exception handling is not available")
 
+proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
+                      line: int) {.compilerRtl.} =
+  sysFatal(ReraiseDefect, "exception handling is not available")
+
 proc reraiseException() {.compilerRtl.} =
   sysFatal(ReraiseDefect, "no exception to reraise")
 
@@ -44,3 +49,13 @@ proc setControlCHook(hook: proc () {.noconv.}) = discard
 
 proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
   sysFatal(ReraiseDefect, "exception handling is not available")
+
+when gotoBasedExceptions:
+  var nimInErrorMode {.threadvar.}: bool
+
+  proc nimErrorFlag(): ptr bool {.compilerRtl, inl.} =
+    result = addr(nimInErrorMode)
+
+  proc nimTestErrorFlag() {.compilerRtl.} =
+    if nimInErrorMode:
+      sysFatal(ReraiseDefect, "exception handling is not available")
diff --git a/lib/system/exceptions.nim b/lib/system/exceptions.nim
index 3006cff19..63588f858 100644
--- a/lib/system/exceptions.nim
+++ b/lib/system/exceptions.nim
@@ -1,56 +1,13 @@
-const NimStackTraceMsgs =
-  when defined(nimHasStacktraceMsgs): compileOption("stacktraceMsgs")
-  else: false
+## Exception and effect types used in Nim code.
 
 type
-  RootEffect* {.compilerproc.} = object of RootObj ## \
-    ## Base effect class.
-    ##
-    ## Each effect should inherit from `RootEffect` unless you know what
-    ## you're doing.
   TimeEffect* = object of RootEffect   ## Time effect.
   IOEffect* = object of RootEffect     ## IO effect.
   ReadIOEffect* = object of IOEffect   ## Effect describing a read IO operation.
   WriteIOEffect* = object of IOEffect  ## Effect describing a write IO operation.
   ExecIOEffect* = object of IOEffect   ## Effect describing an executing IO operation.
 
-  StackTraceEntry* = object ## In debug mode exceptions store the stack trace that led
-                            ## to them. A `StackTraceEntry` is a single entry of the
-                            ## stack trace.
-    procname*: cstring      ## Name of the proc that is currently executing.
-    line*: int              ## Line number of the proc that is currently executing.
-    filename*: cstring      ## Filename of the proc that is currently executing.
-    when NimStackTraceMsgs:
-      frameMsg*: string     ## When a stacktrace is generated in a given frame and
-                            ## rendered at a later time, we should ensure the stacktrace
-                            ## data isn't invalidated; any pointer into PFrame is
-                            ## subject to being invalidated so shouldn't be stored.
-
-  Exception* {.compilerproc, magic: "Exception".} = object of RootObj ## \
-    ## Base exception class.
-    ##
-    ## Each exception has to inherit from `Exception`. See the full `exception
-    ## hierarchy <manual.html#exception-handling-exception-hierarchy>`_.
-    parent*: ref Exception ## Parent exception (can be used as a stack).
-    name*: cstring         ## The exception's name is its Nim identifier.
-                           ## This field is filled automatically in the
-                           ## ``raise`` statement.
-    msg* {.exportc: "message".}: string ## The exception's message. Not
-                                        ## providing an exception message
-                                        ## is bad style.
-    when defined(js):
-      trace: string
-    else:
-      trace: seq[StackTraceEntry]
-    up: ref Exception # used for stacking exceptions. Not exported!
-
-  Defect* = object of Exception ## \
-    ## Abstract base class for all exceptions that Nim's runtime raises
-    ## but that are strictly uncatchable as they can also be mapped to
-    ## a ``quit`` / ``trap`` / ``exit`` operation.
-
-  CatchableError* = object of Exception ## \
-    ## Abstract class for all exceptions that are catchable.
+type
   IOError* = object of CatchableError ## \
     ## Raised if an IO error occurred.
   EOFError* = object of IOError ## \
@@ -105,13 +62,13 @@ type
     ## Raised if an object gets assigned to its parent's object.
   ObjectConversionDefect* = object of Defect ## \
     ## Raised if an object is converted to an incompatible object type.
-    ## You can use ``of`` operator to check if conversion will succeed.
+    ## You can use `of` operator to check if conversion will succeed.
   FloatingPointDefect* = object of Defect ## \
     ## Base class for floating point exceptions.
   FloatInvalidOpDefect* = object of FloatingPointDefect ## \
     ## Raised by invalid operations according to IEEE.
     ##
-    ## Raised by ``0.0/0.0``, for example.
+    ## Raised by `0.0/0.0`, for example.
   FloatDivByZeroDefect* = object of FloatingPointDefect ## \
     ## Raised by division by zero.
     ##
@@ -129,35 +86,37 @@ type
     ## Raised for inexact results.
     ##
     ## The operation produced a result that cannot be represented with infinite
-    ## precision -- for example: ``2.0 / 3.0, log(1.1)``
+    ## precision -- for example: `2.0 / 3.0, log(1.1)`
     ##
     ## **Note**: Nim currently does not detect these!
   DeadThreadDefect* = object of Defect ## \
     ## Raised if it is attempted to send a message to a dead thread.
   NilAccessDefect* = object of Defect ## \
-    ## Raised on dereferences of ``nil`` pointers.
+    ## Raised on dereferences of `nil` pointers.
     ##
     ## This is only raised if the `segfaults module <segfaults.html>`_ was imported!
 
-  ArithmeticError* {.deprecated: "See corresponding Defect".} = ArithmeticDefect
-  DivByZeroError* {.deprecated: "See corresponding Defect".} = DivByZeroDefect
-  OverflowError* {.deprecated: "See corresponding Defect".} = OverflowDefect
-  AccessViolationError* {.deprecated: "See corresponding Defect".} = AccessViolationDefect
-  AssertionError* {.deprecated: "See corresponding Defect".} = AssertionDefect
-  OutOfMemError* {.deprecated: "See corresponding Defect".} = OutOfMemDefect
-  IndexError* {.deprecated: "See corresponding Defect".} = IndexDefect
+when not defined(nimPreviewSlimSystem):
+  type
+    ArithmeticError* {.deprecated: "See corresponding Defect".} = ArithmeticDefect
+    DivByZeroError* {.deprecated: "See corresponding Defect".} = DivByZeroDefect
+    OverflowError* {.deprecated: "See corresponding Defect".} = OverflowDefect
+    AccessViolationError* {.deprecated: "See corresponding Defect".} = AccessViolationDefect
+    AssertionError* {.deprecated: "See corresponding Defect".} = AssertionDefect
+    OutOfMemError* {.deprecated: "See corresponding Defect".} = OutOfMemDefect
+    IndexError* {.deprecated: "See corresponding Defect".} = IndexDefect
 
-  FieldError* {.deprecated: "See corresponding Defect".} = FieldDefect
-  RangeError* {.deprecated: "See corresponding Defect".} = RangeDefect
-  StackOverflowError* {.deprecated: "See corresponding Defect".} = StackOverflowDefect
-  ReraiseError* {.deprecated: "See corresponding Defect".} = ReraiseDefect
-  ObjectAssignmentError* {.deprecated: "See corresponding Defect".} = ObjectAssignmentDefect
-  ObjectConversionError* {.deprecated: "See corresponding Defect".} = ObjectConversionDefect
-  FloatingPointError* {.deprecated: "See corresponding Defect".} = FloatingPointDefect
-  FloatInvalidOpError* {.deprecated: "See corresponding Defect".} = FloatInvalidOpDefect
-  FloatDivByZeroError* {.deprecated: "See corresponding Defect".} = FloatDivByZeroDefect
-  FloatOverflowError* {.deprecated: "See corresponding Defect".} = FloatOverflowDefect
-  FloatUnderflowError* {.deprecated: "See corresponding Defect".} = FloatUnderflowDefect
-  FloatInexactError* {.deprecated: "See corresponding Defect".} = FloatInexactDefect
-  DeadThreadError* {.deprecated: "See corresponding Defect".} = DeadThreadDefect
-  NilAccessError* {.deprecated: "See corresponding Defect".} = NilAccessDefect
+    FieldError* {.deprecated: "See corresponding Defect".} = FieldDefect
+    RangeError* {.deprecated: "See corresponding Defect".} = RangeDefect
+    StackOverflowError* {.deprecated: "See corresponding Defect".} = StackOverflowDefect
+    ReraiseError* {.deprecated: "See corresponding Defect".} = ReraiseDefect
+    ObjectAssignmentError* {.deprecated: "See corresponding Defect".} = ObjectAssignmentDefect
+    ObjectConversionError* {.deprecated: "See corresponding Defect".} = ObjectConversionDefect
+    FloatingPointError* {.deprecated: "See corresponding Defect".} = FloatingPointDefect
+    FloatInvalidOpError* {.deprecated: "See corresponding Defect".} = FloatInvalidOpDefect
+    FloatDivByZeroError* {.deprecated: "See corresponding Defect".} = FloatDivByZeroDefect
+    FloatOverflowError* {.deprecated: "See corresponding Defect".} = FloatOverflowDefect
+    FloatUnderflowError* {.deprecated: "See corresponding Defect".} = FloatUnderflowDefect
+    FloatInexactError* {.deprecated: "See corresponding Defect".} = FloatInexactDefect
+    DeadThreadError* {.deprecated: "See corresponding Defect".} = DeadThreadDefect
+    NilAccessError* {.deprecated: "See corresponding Defect".} = NilAccessDefect
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 089048163..dae5c4a4a 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -11,6 +11,9 @@
 # use the heap (and nor exceptions) do not include the GC or memory allocator.
 
 import std/private/miscdollars
+import stacktraces
+
+const noStacktraceAvailable = "No stack traceback available\n"
 
 var
   errorMessageWriter*: (proc(msg: string) {.tags: [WriteIOEffect], benign,
@@ -25,13 +28,21 @@ when defined(windows):
 
 when not defined(windows) or not defined(guiapp):
   proc writeToStdErr(msg: cstring) = rawWrite(cstderr, msg)
+  proc writeToStdErr(msg: cstring, length: int) =
+    rawWriteString(cstderr, msg, length)
 else:
   proc MessageBoxA(hWnd: pointer, lpText, lpCaption: cstring, uType: int): int32 {.
     header: "<windows.h>", nodecl.}
   proc writeToStdErr(msg: cstring) =
     discard MessageBoxA(nil, msg, nil, 0)
+  proc writeToStdErr(msg: cstring, length: int) =
+    discard MessageBoxA(nil, msg, nil, 0)
+
+proc writeToStdErr(msg: string) {.inline.} =
+  # fix bug #13115: handles correctly '\0' unlike default implicit conversion to cstring
+  writeToStdErr(msg.cstring, msg.len)
 
-proc showErrorMessage(data: cstring) {.gcsafe, raises: [].} =
+proc showErrorMessage(data: cstring, length: int) {.gcsafe, raises: [].} =
   var toWrite = true
   if errorMessageWriter != nil:
     try:
@@ -44,7 +55,10 @@ proc showErrorMessage(data: cstring) {.gcsafe, raises: [].} =
       # stderr not available by default, use the LOG session
       echo data
     else:
-      writeToStdErr(data)
+      writeToStdErr(data, length)
+
+proc showErrorMessage2(data: string) {.inline.} =
+  showErrorMessage(data.cstring, data.len)
 
 proc chckIndx(i, a, b: int): int {.inline, compilerproc, benign.}
 proc chckRange(i, a, b: int): int {.inline, compilerproc, benign.}
@@ -59,26 +73,45 @@ type
 
 when NimStackTraceMsgs:
   var frameMsgBuf* {.threadvar.}: string
+
+when not defined(nimV2):
+  var
+    framePtr {.threadvar.}: PFrame
+
 var
-  framePtr {.threadvar.}: PFrame
-  excHandler {.threadvar.}: PSafePoint
-    # list of exception handlers
-    # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
-  gcFramePtr {.threadvar.}: GcFrame
 
-type
-  FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
-                     excHandler: PSafePoint, currException: ref Exception]
+when not gotoBasedExceptions:
+  var
+    excHandler {.threadvar.}: PSafePoint
+      # list of exception handlers
+      # a global variable for the root of all try blocks
+    gcFramePtr {.threadvar.}: GcFrame
+
+when gotoBasedExceptions:
+  type
+    FrameState = tuple[framePtr: PFrame,
+                      currException: ref Exception]
+else:
+  type
+    FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
+                      excHandler: PSafePoint, currException: ref Exception]
 
 proc getFrameState*(): FrameState {.compilerRtl, inl.} =
-  return (gcFramePtr, framePtr, excHandler, currException)
+  when gotoBasedExceptions:
+    return (framePtr, currException)
+  else:
+    return (gcFramePtr, framePtr, excHandler, currException)
 
 proc setFrameState*(state: FrameState) {.compilerRtl, inl.} =
-  gcFramePtr = state.gcFramePtr
-  framePtr = state.framePtr
-  excHandler = state.excHandler
-  currException = state.currException
+  when gotoBasedExceptions:
+    framePtr = state.framePtr
+    currException = state.currException
+  else:
+    gcFramePtr = state.gcFramePtr
+    framePtr = state.framePtr
+    excHandler = state.excHandler
+    currException = state.currException
 
 proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
 
@@ -100,29 +133,30 @@ when false:
 proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
-proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
-proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
-proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
-proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
-  s.prev = gcFramePtr
-  zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
-  gcFramePtr = s
+when not gotoBasedExceptions:
+  proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
+  proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
+  proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
+  proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
+    s.prev = gcFramePtr
+    zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
+    gcFramePtr = s
 
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
-  s.prev = excHandler
-  excHandler = s
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
+    s.prev = excHandler
+    excHandler = s
 
-proc popSafePoint {.compilerRtl, inl.} =
-  excHandler = excHandler.prev
+  proc popSafePoint {.compilerRtl, inl.} =
+    excHandler = excHandler.prev
 
 proc pushCurrentException(e: sink(ref Exception)) {.compilerRtl, inl.} =
   e.up = currException
   currException = e
-  #showErrorMessage "A"
+  #showErrorMessage2 "A"
 
 proc popCurrentException {.compilerRtl, inl.} =
   currException = currException.up
-  #showErrorMessage "B"
+  #showErrorMessage2 "B"
 
 proc popCurrentExceptionEx(id: uint) {.compilerRtl.} =
   discard "only for bootstrapping compatbility"
@@ -132,25 +166,11 @@ proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
 
 # some platforms have native support for stack traces:
 const
-  nativeStackTraceSupported* = (defined(macosx) or defined(linux)) and
+  nativeStackTraceSupported = (defined(macosx) or defined(linux)) and
                               not NimStackTrace
   hasSomeStackTrace = NimStackTrace or defined(nimStackTraceOverride) or
     (defined(nativeStackTrace) and nativeStackTraceSupported)
 
-when defined(nimStackTraceOverride):
-  type StackTraceOverrideProc* = proc (): string {.nimcall, noinline, benign, raises: [], tags: [].}
-    ## Procedure type for overriding the default stack trace.
-
-  var stackTraceOverrideGetTraceback: StackTraceOverrideProc = proc(): string {.noinline.} =
-    result = "Stack trace override procedure not registered.\n"
-
-  proc registerStackTraceOverride*(overrideProc: StackTraceOverrideProc) =
-    ## Override the default stack trace inside rawWriteStackTrace() with your
-    ## own procedure.
-    stackTraceOverrideGetTraceback = overrideProc
-
-  proc auxWriteStackTraceWithOverride(s: var string) =
-    add(s, stackTraceOverrideGetTraceback())
 
 when defined(nativeStacktrace) and nativeStackTraceSupported:
   type
@@ -168,13 +188,13 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
   when not hasThreadSupport:
     var
-      tempAddresses: array[0..127, pointer] # should not be alloc'd on stack
+      tempAddresses: array[maxStackTraceLines, pointer] # should not be alloc'd on stack
       tempDlInfo: TDl_info
 
   proc auxWriteStackTraceWithBacktrace(s: var string) =
     when hasThreadSupport:
       var
-        tempAddresses: array[0..127, pointer] # but better than a threadvar
+        tempAddresses: array[maxStackTraceLines, pointer] # but better than a threadvar
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
@@ -202,11 +222,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
 when hasSomeStackTrace and not hasThreadSupport:
   var
-    tempFrames: array[0..127, PFrame] # should not be alloc'd on stack
-
-const
-  reraisedFromBegin = -10
-  reraisedFromEnd = -100
+    tempFrames: array[maxStackTraceLines, PFrame] # should not be alloc'd on stack
 
 template reraisedFrom(z): untyped =
   StackTraceEntry(procname: nil, line: z, filename: nil)
@@ -246,14 +262,19 @@ template addFrameEntry(s: var string, f: StackTraceEntry|PFrame) =
   for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ')
   add(s, f.procname)
   when NimStackTraceMsgs:
-    when type(f) is StackTraceEntry:
+    when typeof(f) is StackTraceEntry:
       add(s, f.frameMsg)
     else:
       var first = if f.prev == nil: 0 else: f.prev.frameMsgLen
       for i in first..<f.frameMsgLen: add(s, frameMsgBuf[i])
   add(s, "\n")
 
-proc `$`(s: seq[StackTraceEntry]): string =
+proc `$`(stackTraceEntries: seq[StackTraceEntry]): string =
+  when defined(nimStackTraceOverride):
+    let s = addDebuggingInfo(stackTraceEntries)
+  else:
+    let s = stackTraceEntries
+
   result = newStringOfCap(2000)
   for i in 0 .. s.len-1:
     if s[i].line == reraisedFromBegin: result.add "[[reraised from:\n"
@@ -265,7 +286,7 @@ when hasSomeStackTrace:
   proc auxWriteStackTrace(f: PFrame, s: var string) =
     when hasThreadSupport:
       var
-        tempFrames: array[0..127, PFrame] # but better than a threadvar
+        tempFrames: array[maxStackTraceLines, PFrame] # but better than a threadvar
     const
       firstCalls = 32
     var
@@ -313,7 +334,7 @@ when hasSomeStackTrace:
       auxWriteStackTraceWithOverride(s)
     elif NimStackTrace:
       if framePtr == nil:
-        add(s, "No stack traceback available\n")
+        add(s, noStacktraceAvailable)
       else:
         add(s, "Traceback (most recent call last)\n")
         auxWriteStackTrace(framePtr, s)
@@ -321,10 +342,12 @@ when hasSomeStackTrace:
       add(s, "Traceback from system (most recent call last)\n")
       auxWriteStackTraceWithBacktrace(s)
     else:
-      add(s, "No stack traceback available\n")
+      add(s, noStacktraceAvailable)
 
   proc rawWriteStackTrace(s: var seq[StackTraceEntry]) =
-    when NimStackTrace:
+    when defined(nimStackTraceOverride):
+      auxWriteStackTraceWithOverride(s)
+    elif NimStackTrace:
       auxWriteStackTrace(framePtr, s)
     else:
       s = @[]
@@ -348,10 +371,10 @@ var onUnhandledException*: (proc (errorMsg: string) {.
   nimcall, gcsafe.}) ## Set this error \
   ## handler to override the existing behaviour on an unhandled exception.
   ##
-  ## The default is to write a stacktrace to ``stderr`` and then call ``quit(1)``.
+  ## The default is to write a stacktrace to `stderr` and then call `quit(1)`.
   ## Unstable API.
 
-proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy.} =
+proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy, gcsafe.} =
   when hasSomeStackTrace:
     var buf = newStringOfCap(2000)
     if e.trace.len == 0:
@@ -359,7 +382,8 @@ proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy.} =
     else:
       var trace = $e.trace
       add(buf, trace)
-      `=destroy`(trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
     add(buf, "Error: unhandled exception: ")
     add(buf, e.msg)
     add(buf, " [")
@@ -369,13 +393,14 @@ proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy.} =
     if onUnhandledException != nil:
       onUnhandledException(buf)
     else:
-      showErrorMessage(buf)
-    `=destroy`(buf)
+      showErrorMessage2(buf)
+    {.gcsafe.}:
+      `=destroy`(buf)
   else:
     # ugly, but avoids heap allocations :-)
     template xadd(buf, s, slen) =
       if L + slen < high(buf):
-        copyMem(addr(buf[L]), cstring(s), slen)
+        copyMem(addr(buf[L]), (when s is cstring: s else: cstring(s)), slen)
         inc L, slen
     template add(buf, s) =
       xadd(buf, s, s.len)
@@ -384,39 +409,34 @@ proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy.} =
     if e.trace.len != 0:
       var trace = $e.trace
       add(buf, trace)
-      `=destroy`(trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
     add(buf, "Error: unhandled exception: ")
     add(buf, e.msg)
     add(buf, " [")
     xadd(buf, e.name, e.name.len)
     add(buf, "]\n")
-    when defined(nimNoArrayToCstringConversion):
-      template tbuf(): untyped = addr buf
-    else:
-      template tbuf(): untyped = buf
-
     if onUnhandledException != nil:
-      onUnhandledException($tbuf())
+      onUnhandledException($cast[cstring](buf.addr))
     else:
-      showErrorMessage(tbuf())
+      showErrorMessage(cast[cstring](buf.addr), L)
 
-proc reportUnhandledError(e: ref Exception) {.nodestroy.} =
+proc reportUnhandledError(e: ref Exception) {.nodestroy, gcsafe.} =
   if unhandledExceptionHook != nil:
     unhandledExceptionHook(e)
   when hostOS != "any":
     reportUnhandledErrorAux(e)
-  else:
-    discard ()
 
-proc nimLeaveFinally() {.compilerRtl.} =
-  when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
-    {.emit: "throw;".}
-  else:
-    if excHandler != nil:
-      c_longjmp(excHandler.context, 1)
+when not gotoBasedExceptions:
+  proc nimLeaveFinally() {.compilerRtl.} =
+    when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+      {.emit: "throw;".}
     else:
-      reportUnhandledError(currException)
-      quit(1)
+      if excHandler != nil:
+        c_longjmp(excHandler.context, 1)
+      else:
+        reportUnhandledError(currException)
+        rawQuit(1)
 
 when gotoBasedExceptions:
   var nimInErrorMode {.threadvar.}: bool
@@ -425,19 +445,19 @@ when gotoBasedExceptions:
     result = addr(nimInErrorMode)
 
   proc nimTestErrorFlag() {.compilerRtl.} =
-    ## This proc must be called before ``currException`` is destroyed.
+    ## This proc must be called before `currException` is destroyed.
     ## It also must be called at the end of every thread to ensure no
     ## error is swallowed.
     if nimInErrorMode and currException != nil:
       reportUnhandledError(currException)
       currException = nil
-      quit(1)
+      rawQuit(1)
 
 proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
   when defined(nimPanics):
     if e of Defect:
       reportUnhandledError(e)
-      quit(1)
+      rawQuit(1)
 
   if localRaiseHook != nil:
     if not localRaiseHook(e): return
@@ -448,11 +468,9 @@ proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
       {.emit: "throw;".}
     else:
       pushCurrentException(e)
-      {.emit: "throw e;".}
-  elif defined(nimQuirky) or gotoBasedExceptions:
-    # XXX This check should likely also be done in the setjmp case below.
-    if e != currException:
-      pushCurrentException(e)
+      {.emit: "throw `e`;".}
+  elif quirkyExceptions or gotoBasedExceptions:
+    pushCurrentException(e)
     when gotoBasedExceptions:
       inc nimInErrorMode
   else:
@@ -461,14 +479,19 @@ proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
       c_longjmp(excHandler.context, 1)
     else:
       reportUnhandledError(e)
-      quit(1)
+      rawQuit(1)
 
 proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
                       line: int) {.compilerRtl, nodestroy.} =
   if e.name.isNil: e.name = ename
   when hasSomeStackTrace:
     when defined(nimStackTraceOverride):
-      e.trace = @[]
+      if e.trace.len == 0:
+        rawWriteStackTrace(e.trace)
+      else:
+        e.trace.add reraisedFrom(reraisedFromBegin)
+        auxWriteStackTraceWithOverride(e.trace)
+        e.trace.add reraisedFrom(reraisedFromEnd)
     elif NimStackTrace:
       if e.trace.len == 0:
         rawWriteStackTrace(e.trace)
@@ -499,22 +522,22 @@ proc threadTrouble() =
     if currException != nil: reportUnhandledError(currException)
   except:
     discard
-  quit 1
+  rawQuit 1
 
 proc writeStackTrace() =
   when hasSomeStackTrace:
     var s = ""
     rawWriteStackTrace(s)
-    cast[proc (s: cstring) {.noSideEffect, tags: [], nimcall, raises: [].}](showErrorMessage)(s)
   else:
-    cast[proc (s: cstring) {.noSideEffect, tags: [], nimcall, raises: [].}](showErrorMessage)("No stack traceback available\n")
+    let s = noStacktraceAvailable
+  cast[proc (s: string) {.noSideEffect, tags: [], nimcall, raises: [].}](showErrorMessage2)(s)
 
 proc getStackTrace(): string =
   when hasSomeStackTrace:
     result = ""
     rawWriteStackTrace(result)
   else:
-    result = "No stack traceback available\n"
+    result = noStacktraceAvailable
 
 proc getStackTrace(e: ref Exception): string =
   if not isNil(e):
@@ -522,13 +545,10 @@ proc getStackTrace(e: ref Exception): string =
   else:
     result = ""
 
-proc getStackTraceEntries*(e: ref Exception): seq[StackTraceEntry] =
-  ## Returns the attached stack trace to the exception ``e`` as
-  ## a ``seq``. This is not yet available for the JS backend.
-  when not defined(nimSeqsV2):
-    shallowCopy(result, e.trace)
-  else:
-    result = move(e.trace)
+proc getStackTraceEntries*(e: ref Exception): lent seq[StackTraceEntry] =
+  ## Returns the attached stack trace to the exception `e` as
+  ## a `seq`. This is not yet available for the JS backend.
+  e.trace
 
 proc getStackTraceEntries*(): seq[StackTraceEntry] =
   ## Returns the stack trace entries for the current stack trace.
@@ -540,11 +560,12 @@ const nimCallDepthLimit {.intdefine.} = 2000
 
 proc callDepthLimitReached() {.noinline.} =
   writeStackTrace()
-  showErrorMessage("Error: call depth limit reached in a debug build (" &
+  let msg = "Error: call depth limit reached in a debug build (" &
       $nimCallDepthLimit & " function calls). You can change it with " &
       "-d:nimCallDepthLimit=<int> but really try to avoid deep " &
-      "recursions instead.\n")
-  quit(1)
+      "recursions instead.\n"
+  showErrorMessage2(msg)
+  rawQuit(1)
 
 proc nimFrame(s: PFrame) {.compilerRtl, inl, raises: [].} =
   if framePtr == nil:
@@ -559,12 +580,13 @@ proc nimFrame(s: PFrame) {.compilerRtl, inl, raises: [].} =
 
 when defined(cpp) and appType != "lib" and not gotoBasedExceptions and
     not defined(js) and not defined(nimscript) and
-    hostOS != "standalone" and not defined(noCppExceptions):
+    hostOS != "standalone" and hostOS != "any" and not defined(noCppExceptions) and
+    not quirkyExceptions:
 
   type
     StdException {.importcpp: "std::exception", header: "<exception>".} = object
 
-  proc what(ex: StdException): cstring {.importcpp: "((char *)#.what())".}
+  proc what(ex: StdException): cstring {.importcpp: "((char *)#.what())", nodecl.}
 
   proc setTerminate(handler: proc() {.noconv.})
     {.importc: "std::set_terminate", header: "<exception>".}
@@ -596,9 +618,12 @@ when defined(cpp) and appType != "lib" and not gotoBasedExceptions and
     else:
       writeToStdErr msg & "\n"
 
-    quit 1
+    rawQuit 1
 
 when not defined(noSignalHandler) and not defined(useNimRtl):
+  type Sighandler = proc (a: cint) {.noconv, benign.}
+    # xxx factor with ansi_c.CSighandlerT, posix.Sighandler
+
   proc signalHandler(sign: cint) {.exportc: "signalHandler", noconv.} =
     template processSignal(s, action: untyped) {.dirty.} =
       if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
@@ -626,17 +651,34 @@ when not defined(noSignalHandler) and not defined(useNimRtl):
       var buf = newStringOfCap(2000)
       rawWriteStackTrace(buf)
       processSignal(sign, buf.add) # nice hu? currying a la Nim :-)
-      showErrorMessage(buf)
+      showErrorMessage2(buf)
       when not usesDestructors: GC_enable()
     else:
       var msg: cstring
       template asgn(y) =
         msg = y
       processSignal(sign, asgn)
-      showErrorMessage(msg)
-    quit(1) # always quit when SIGABRT
+      # xxx use string for msg instead of cstring, and here use showErrorMessage2(msg)
+      # unless there's a good reason to use cstring in signal handler to avoid
+      # using gc?
+      showErrorMessage(msg, msg.len)
+
+    when defined(posix):
+      # reset the signal handler to OS default
+      c_signal(sign, SIG_DFL)
+
+      # re-raise the signal, which will arrive once this handler exit.
+      # this lets the OS perform actions like core dumping and will
+      # also return the correct exit code to the shell.
+      discard c_raise(sign)
+    else:
+      rawQuit(1)
+
+  var SIG_IGN {.importc: "SIG_IGN", header: "<signal.h>".}: Sighandler
 
   proc registerSignalHandler() =
+    # xxx `signal` is deprecated and has many caveats, we should use `sigaction` instead, e.g.
+    # https://stackoverflow.com/questions/231912/what-is-the-difference-between-sigaction-and-signal
     c_signal(SIGINT, signalHandler)
     c_signal(SIGSEGV, signalHandler)
     c_signal(SIGABRT, signalHandler)
@@ -645,14 +687,17 @@ when not defined(noSignalHandler) and not defined(useNimRtl):
     when declared(SIGBUS):
       c_signal(SIGBUS, signalHandler)
     when declared(SIGPIPE):
-      c_signal(SIGPIPE, signalHandler)
+      when defined(nimLegacySigpipeHandler):
+        c_signal(SIGPIPE, signalHandler)
+      else:
+        c_signal(SIGPIPE, SIG_IGN)
 
   registerSignalHandler() # call it in initialization section
 
 proc setControlCHook(hook: proc () {.noconv.}) =
   # ugly cast, but should work on all architectures:
-  type SignalHandler = proc (sign: cint) {.noconv, benign.}
-  c_signal(SIGINT, cast[SignalHandler](hook))
+  when declared(Sighandler):
+    c_signal(SIGINT, cast[Sighandler](hook))
 
 when not defined(noSignalHandler) and not defined(useNimRtl):
   proc unsetControlCHook() =
diff --git a/lib/system/fatal.nim b/lib/system/fatal.nim
index 761e0dd69..25c05e52d 100644
--- a/lib/system/fatal.nim
+++ b/lib/system/fatal.nim
@@ -9,46 +9,50 @@
 
 {.push profiler: off.}
 
-when defined(nimHasExceptionsQuery):
-  const gotoBasedExceptions = compileOption("exceptions", "goto")
-else:
-  const gotoBasedExceptions = false
+const
+  gotoBasedExceptions = compileOption("exceptions", "goto")
+  quirkyExceptions = compileOption("exceptions", "quirky")
 
 when hostOS == "standalone":
   include "$projectpath/panicoverride"
 
-  proc sysFatal(exceptn: typedesc, message: string) {.inline.} =
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline.} =
     panic(message)
 
-  proc sysFatal(exceptn: typedesc, message, arg: string) {.inline.} =
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline.} =
     rawoutput(message)
     panic(arg)
 
-elif (defined(nimQuirky) or defined(nimPanics)) and not defined(nimscript):
+elif quirkyExceptions and not defined(nimscript):
   import ansi_c
 
-  proc name(t: typedesc): string {.magic: "TypeTrait".}
-
-  proc sysFatal(exceptn: typedesc, message, arg: string) {.inline, noreturn.} =
-    writeStackTrace()
-    var buf = newStringOfCap(200)
-    add(buf, "Error: unhandled exception: ")
-    add(buf, message)
-    add(buf, arg)
-    add(buf, " [")
-    add(buf, name exceptn)
-    add(buf, "]\n")
-    cstderr.rawWrite buf
-    quit 1
-
-  proc sysFatal(exceptn: typedesc, message: string) {.inline, noreturn.} =
+  func name(t: typedesc): string {.magic: "TypeTrait".}
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
+    when nimvm:
+      # TODO when doAssertRaises works in CT, add a test for it
+      raise (ref exceptn)(msg: message & arg)
+    else:
+      {.noSideEffect.}:
+        writeStackTrace()
+        var buf = newStringOfCap(200)
+        add(buf, "Error: unhandled exception: ")
+        add(buf, message)
+        add(buf, arg)
+        add(buf, " [")
+        add(buf, name exceptn)
+        add(buf, "]\n")
+        cstderr.rawWrite buf
+      rawQuit 1
+
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
     sysFatal(exceptn, message, "")
 
 else:
-  proc sysFatal(exceptn: typedesc, message: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
     raise (ref exceptn)(msg: message)
 
-  proc sysFatal(exceptn: typedesc, message, arg: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
     raise (ref exceptn)(msg: message & arg)
 
 {.pop.}
diff --git a/lib/system/formatfloat.nim b/lib/system/formatfloat.nim
index cb46c8c36..70dd857d5 100644
--- a/lib/system/formatfloat.nim
+++ b/lib/system/formatfloat.nim
@@ -1,59 +1,6 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2019 Nim contributors
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
-                                    importc: "sprintf", varargs, noSideEffect.}
-
-proc writeToBuffer(buf: var array[65, char]; value: cstring) =
-  var i = 0
-  while value[i] != '\0':
-    buf[i] = value[i]
-    inc i
-
-proc writeFloatToBuffer*(buf: var array[65, char]; value: BiggestFloat): int =
-  ## This is the implementation to format floats in the Nim
-  ## programming language. The specific format for floating point
-  ## numbers is not specified in the Nim programming language and
-  ## might change slightly in the future, but at least wherever you
-  ## format a float, it should be consistent.
-  ##
-  ## returns the amount of bytes written to `buf` not counting the
-  ## terminating '\0' character.
-  ##
-  ## * `buf` - A buffer to write into. The buffer does not need to be
-  ##           initialized and it will be overridden.
-  ##
-  var n: int = c_sprintf(addr buf, "%.16g", value)
-  var hasDot = false
-  for i in 0..n-1:
-    if buf[i] == ',':
-      buf[i] = '.'
-      hasDot = true
-    elif buf[i] in {'a'..'z', 'A'..'Z', '.'}:
-      hasDot = true
-  if not hasDot:
-    buf[n] = '.'
-    buf[n+1] = '0'
-    buf[n+2] = '\0'
-    result = n + 2
-  else:
-    result = n
-  # On Windows nice numbers like '1.#INF', '-1.#INF' or '1.#NAN' or 'nan(ind)'
-  # of '-1.#IND' are produced.
-  # We want to get rid of these here:
-  if buf[n-1] in {'n', 'N', 'D', 'd', ')'}:
-    writeToBuffer(buf, "nan")
-    result = 3
-  elif buf[n-1] == 'F':
-    if buf[0] == '-':
-      writeToBuffer(buf, "-inf")
-      result = 4
-    else:
-      writeToBuffer(buf, "inf")
-      result = 3
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export formatfloat
+  {.deprecated: "use `std/formatfloat`".}
+else:
+  {.error: "use `std/formatfloat`".}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 304eda19a..9289c7f55 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -12,6 +12,55 @@
 # Refcounting + Mark&Sweep. Complex algorithms avoided.
 # Been there, done that, didn't work.
 
+#[
+
+A *cell* is anything that is traced by the GC
+(sequences, refs, strings, closures).
+
+The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+References on the stack are not counted for better performance and easier C
+code generation.
+
+Each cell has a header consisting of a RC and a pointer to its type
+descriptor. However the program does not know about these, so they are placed at
+negative offsets. In the GC code the type `PCell` denotes a pointer
+decremented by the right offset, so that the header can be accessed easily. It
+is extremely important that `pointer` is not confused with a `PCell`.
+
+In Nim the compiler cannot always know if a reference
+is stored on the stack or not. This is caused by var parameters.
+Consider this example:
+
+  ```Nim
+  proc setRef(r: var ref TNode) =
+    new(r)
+
+  proc usage =
+    var
+      r: ref TNode
+    setRef(r) # here we should not update the reference counts, because
+              # r is on the stack
+    setRef(r.left) # here we should update the refcounts!
+  ```
+
+We have to decide at runtime whether the reference is on the stack or not.
+The generated code looks roughly like this:
+
+  ```C
+  void setref(TNode** ref) {
+    unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode)))
+  }
+  void usage(void) {
+    setRef(&r)
+    setRef(&r->left)
+  }
+  ```
+
+Note that for systems with a continuous stack (which most systems have)
+the check whether the ref is on the stack is very cheap (only two
+comparisons).
+]#
+
 {.push profiler:off.}
 
 const
@@ -29,7 +78,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int) {.benign.}
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -47,7 +96,7 @@ type
     waZctDecRef, waPush
     #, waDebug
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
@@ -114,7 +163,7 @@ template gcAssert(cond: bool, msg: string) =
       writeStackTrace()
       #var x: ptr int
       #echo x[]
-      quit 1
+      rawQuit 1
 
 proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
   if (c.refcount and ZctFlag) == 0:
@@ -123,11 +172,11 @@ proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
@@ -172,11 +221,11 @@ template gcTrace(cell, state: untyped) =
   when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
+proc collectCT(gch: var GcHeap) {.benign, raises: [].}
+proc isOnStack(p: pointer): bool {.noinline, benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
 proc incRef(c: PCell) {.inline.} =
@@ -289,7 +338,7 @@ proc cellsetReset(s: var CellSet) =
 {.push stacktrace:off.}
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -309,7 +358,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
@@ -335,7 +384,7 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
     of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
+      var d = cast[int](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
         for i in 0..s.len-1:
@@ -410,7 +459,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
@@ -435,7 +484,7 @@ proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   when defined(memProfiler): nimProfile(size)
 
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
@@ -450,7 +499,7 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   when defined(memProfiler): nimProfile(size)
 {.pop.}
 
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   # generates a new object and sets its reference counter to 1
   incTypeSize typ, size
   sysAssert(allocInv(gch.region), "newObjRC1 begin")
@@ -460,7 +509,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
 
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
@@ -502,9 +551,9 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
 
   var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize
   copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res) +% oldsize +% sizeof(Cell)),
+  zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
   # This can be wrong for intermediate temps that are nevertheless on the
   # heap because of lambda lifting:
   #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
@@ -514,35 +563,8 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   gcTrace(res, csAllocated)
   track("growObj old", ol, 0)
   track("growObj new", res, newsize)
-  when defined(nimIncrSeqV3):
-    # since we steal the old seq's contents, we set the old length to 0.
-    cast[PGenericSeq](old).len = 0
-  elif reallyDealloc:
-    sysAssert(allocInv(gch.region), "growObj before dealloc")
-    if ol.refcount shr rcShift <=% 1:
-      # free immediately to save space:
-      if (ol.refcount and ZctFlag) != 0:
-        var j = gch.zct.len-1
-        var d = gch.zct.d
-        while j >= 0:
-          if d[j] == ol:
-            d[j] = res
-            break
-          dec(j)
-      beforeDealloc(gch, ol, "growObj stack trash")
-      decTypeSize(ol, ol.typ)
-      rawDealloc(gch.region, ol)
-    else:
-      # we split the old refcount in 2 parts. XXX This is still not entirely
-      # correct if the pointer that receives growObj's result is on the stack.
-      # A better fix would be to emit the location specific write barrier for
-      # 'growObj', but this is lots of more work and who knows what new problems
-      # this would create.
-      res.refcount = rcIncrement
-      decRef(ol)
-  else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(Cell))
+  # since we steal the old seq's contents, we set the old length to 0.
+  cast[PGenericSeq](old).len = 0
   when useCellIds:
     inc gch.idGenerator
     res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
@@ -589,7 +611,7 @@ proc markS(gch: var GcHeap, c: PCell) =
     if not containsOrIncl(gch.marked, d):
       forAllChildren(d, waMarkPrecise)
 
-proc markGlobals(gch: var GcHeap) =
+proc markGlobals(gch: var GcHeap) {.raises: [].} =
   if gch.gcThreadId == 0:
     for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
   for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
@@ -606,7 +628,7 @@ when logGC:
       if cycleCheckA[i] == c: return true
     if cycleCheckALen == len(cycleCheckA):
       gcAssert(false, "cycle detection overflow")
-      quit 1
+      rawQuit 1
     cycleCheckA[cycleCheckALen] = c
     inc cycleCheckALen
 
@@ -644,9 +666,9 @@ proc doOperation(p: pointer, op: WalkOp) =
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, WalkOp(op))
 
-proc collectZCT(gch: var GcHeap): bool {.benign.}
+proc collectZCT(gch: var GcHeap): bool {.benign, raises: [].}
 
-proc collectCycles(gch: var GcHeap) =
+proc collectCycles(gch: var GcHeap) {.raises: [].} =
   when hasThreadSupport:
     for c in gch.toDispose:
       nimGCunref(c)
@@ -663,16 +685,16 @@ proc collectCycles(gch: var GcHeap) =
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       # mark the cell:
       incRef(objStart)
       add(gch.decStack, objStart)
     when false:
+      let cell = usrToCell(p)
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
         # mark the cell:
@@ -753,7 +775,7 @@ proc unmarkStackAndRegisters(gch: var GcHeap) =
     decRef(d[i])
   gch.decStack.len = 0
 
-proc collectCTBody(gch: var GcHeap) =
+proc collectCTBody(gch: var GcHeap) {.raises: [].} =
   when withRealTime:
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
@@ -828,10 +850,10 @@ when withRealTime:
         stack.bottomSaved = stack.bottom
         when stackIncreases:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
+            cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize)
         else:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
+            cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize)
 
     GC_step(gch, us, strongAdvice)
 
@@ -856,7 +878,7 @@ when not defined(useNimRtl):
     gch.cycleThreshold = InitialCycleThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
deleted file mode 100644
index 09388b6e8..000000000
--- a/lib/system/gc2.nim
+++ /dev/null
@@ -1,746 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2017 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-#            Garbage Collector
-#
-# The basic algorithm is an incremental mark
-# and sweep GC to free cycles. It is hard realtime in that if you play
-# according to its rules, no deadline will ever be missed.
-# Since this kind of collector is very bad at recycling dead objects
-# early, Nim's codegen emits ``nimEscape`` calls at strategic
-# places. For this to work even 'unsureAsgnRef' needs to mark things
-# so that only return values need to be considered in ``nimEscape``.
-
-{.push profiler:off.}
-
-const
-  CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 512*1024 # start collecting after 500KB
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
-  withRealTime = defined(useRealtimeGC)
-
-when withRealTime and not declared(getTicks):
-  include "system/timers"
-when defined(memProfiler):
-  proc nimProfile(requestedSize: int) {.benign.}
-
-when hasThreadSupport:
-  include sharedlist
-
-type
-  ObjectSpaceIter = object
-    state: range[-1..0]
-
-iterToProc(allObjects, ptr ObjectSpaceIter, allObjectsAsProc)
-
-const
-  escapedBit = 0b1000 # so that lowest 3 bits are not touched
-  rcBlackOrig = 0b000
-  rcWhiteOrig = 0b001
-  rcGrey = 0b010   # traditional color for incremental mark&sweep
-  rcUnused = 0b011
-  colorMask = 0b011
-type
-  WalkOp = enum
-    waMarkGlobal,    # part of the backup mark&sweep
-    waMarkGrey,
-    waZctDecRef,
-    waDebug
-
-  Phase {.pure.} = enum
-    None, Marking, Sweeping
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
-    # A ref type can have a finalizer that is called before the object's
-    # storage is freed.
-
-  GcStat = object
-    stackScans: int          # number of performed stack scans (for statistics)
-    completedCollections: int    # number of performed full collections
-    maxThreshold: int        # max threshold that has been set
-    maxStackSize: int        # max stack size
-    maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table
-    maxPause: int64          # max measured GC pause in nanoseconds
-
-  GcStack {.final, pure.} = object
-    when nimCoroutines:
-      prev: ptr GcStack
-      next: ptr GcStack
-      maxStackSize: int      # Used to track statistics because we can not use
-                             # GcStat.maxStackSize when multiple stacks exist.
-    bottom: pointer
-
-    when withRealTime or nimCoroutines:
-      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
-    when withRealTime:
-      bottomSaved: pointer
-
-  GcHeap = object # this contains the zero count and
-                  # non-zero count table
-    black, red: int # either 0 or 1.
-    stack: GcStack
-    when nimCoroutines:
-      activeStack: ptr GcStack    # current executing coroutine stack.
-    phase: Phase
-    cycleThreshold: int
-    when useCellIds:
-      idGenerator: int
-    greyStack: CellSeq
-    recGcLock: int           # prevent recursion via finalizers; no thread lock
-    when withRealTime:
-      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
-    region: MemRegion        # garbage collected region
-    stat: GcStat
-    additionalRoots: CellSeq # explicit roots for GC_ref/unref
-    spaceIter: ObjectSpaceIter
-    pDumpHeapFile: pointer # File that is used for GC_dumpHeap
-    when hasThreadSupport:
-      toDispose: SharedList[pointer]
-    gcThreadId: int
-
-var
-  gch {.rtlThreadVar.}: GcHeap
-
-when not defined(useNimRtl):
-  instantiateForRegion(gch.region)
-
-# Which color to use for new objects is tricky: When we're marking,
-# they have to be *white* so that everything is marked that is only
-# reachable from them. However, when we are sweeping, they have to
-# be black, so that we don't free them prematuredly. In order to save
-# a comparison gch.phase == Phase.Marking, we use the pseudo-color
-# 'red' for new objects.
-template allocColor(): untyped = gch.red
-
-template gcAssert(cond: bool, msg: string) =
-  when defined(useGcAssert):
-    if not cond:
-      echo "[GCASSERT] ", msg
-      GC_disable()
-      writeStackTrace()
-      quit 1
-
-proc cellToUsr(cell: PCell): pointer {.inline.} =
-  # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
-
-proc usrToCell(usr: pointer): PCell {.inline.} =
-  # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
-
-proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
-  # used for code generation concerning debugging
-  result = usrToCell(c).typ
-
-proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = 0
-
-# this that has to equals zero, otherwise we have to round up UnitsPerPage:
-when BitsPerPage mod (sizeof(int)*8) != 0:
-  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
-
-template color(c): untyped = c.refCount and colorMask
-template setColor(c, col) =
-  c.refcount = c.refcount and not colorMask or col
-
-template markAsEscaped(c: PCell) =
-  c.refcount = c.refcount or escapedBit
-
-template didEscape(c: PCell): bool =
-  (c.refCount and escapedBit) != 0
-
-proc writeCell(file: File; msg: cstring, c: PCell) =
-  var kind = -1
-  if c.typ != nil: kind = ord(c.typ.kind)
-  let col = if c.color == rcGrey: 'g'
-            elif c.color == gch.black: 'b'
-            else: 'w'
-  when useCellIds:
-    let id = c.id
-  else:
-    let id = c
-  when defined(nimTypeNames):
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c of type %s\n",
-              msg, id, kind, didEscape(c), col, c.typ.name)
-  elif leakDetector:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c from %s(%ld)\n",
-              msg, id, kind, didEscape(c), col, c.filename, c.line)
-  else:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c\n",
-              msg, id, kind, didEscape(c), col)
-
-proc writeCell(msg: cstring, c: PCell) =
-  stdout.writeCell(msg, c)
-
-proc myastToStr[T](x: T): string {.magic: "AstToStr", noSideEffect.}
-
-template gcTrace(cell, state: untyped) =
-  when traceGC: writeCell(myastToStr(state), cell)
-
-# forward declarations:
-proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
-# we need the prototype here for debugging purposes
-
-proc nimGCref(p: pointer) {.compilerproc.} =
-  let cell = usrToCell(p)
-  markAsEscaped(cell)
-  add(gch.additionalRoots, cell)
-
-proc nimGCunref(p: pointer) {.compilerproc.} =
-  let cell = usrToCell(p)
-  var L = gch.additionalRoots.len-1
-  var i = L
-  let d = gch.additionalRoots.d
-  while i >= 0:
-    if d[i] == cell:
-      d[i] = d[L]
-      dec gch.additionalRoots.len
-      break
-    dec(i)
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} =
-  discard "can we do some freeing here?"
-
-proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} =
-  discard "can we do some freeing here?"
-
-template markGrey(x: PCell) =
-  if x.color != 1-gch.black and gch.phase == Phase.Marking:
-    if not isAllocatedPtr(gch.region, x):
-      c_fprintf(stdout, "[GC] markGrey proc: %p\n", x)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-    x.setColor(rcGrey)
-    add(gch.greyStack, x)
-
-proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-  # the code generator calls this proc!
-  gcAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    markGrey(s)
-  dest[] = src
-
-proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
-  deprecated: "old compiler compat".} = asgnRef(dest, src)
-
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} =
-  # unsureAsgnRef marks 'src' as grey only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide whether a
-  # reference is in the stack or not (this can happen for var parameters).
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    if not isOnStack(dest): markGrey(s)
-  dest[] = src
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
-  case n.kind
-  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
-  of nkList:
-    for i in 0..n.len-1:
-      forAllSlotsAux(dest, n.sons[i], op)
-  of nkCase:
-    var m = selectBranch(dest, n)
-    if m != nil: forAllSlotsAux(dest, m, op)
-  of nkNone: sysAssert(false, "forAllSlotsAux")
-
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
-  if dest == nil: return # nothing to do
-  if ntfNoRefs notin mt.flags:
-    case mt.kind
-    of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[PPointer](d)[], op)
-    of tyObject, tyTuple:
-      forAllSlotsAux(dest, mt.node, op)
-    of tyArray, tyArrayConstr, tyOpenArray:
-      for i in 0..(mt.size div mt.base.size)-1:
-        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: discard
-
-proc forAllChildren(cell: PCell, op: WalkOp) =
-  gcAssert(cell != nil, "forAllChildren: 1")
-  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
-  gcAssert(cell.typ != nil, "forAllChildren: 3")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
-  let marker = cell.typ.marker
-  if marker != nil:
-    marker(cellToUsr(cell), op.int)
-  else:
-    case cell.typ.kind
-    of tyRef: # common case
-      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
-    of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
-    else: discard
-
-{.push stackTrace: off, profiler:off.}
-proc gcInvariant*() =
-  sysAssert(allocInv(gch.region), "injected")
-  when declared(markForDebug):
-    markForDebug(gch)
-{.pop.}
-
-include gc_common
-
-proc initGC() =
-  when not defined(useNimRtl):
-    gch.red = (1-gch.black)
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.completedCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.additionalRoots)
-    init(gch.greyStack)
-    when hasThreadSupport:
-      init(gch.toDispose)
-    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
-    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
-
-proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
-  # generates a new object and sets its reference counter to 0
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  # refcount is zero, color is black, but mark it to be in the ZCT
-  res.refcount = allocColor()
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  sysAssert(allocInv(gch.region), "rawNewObj end")
-
-{.pop.}
-
-proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  when defined(memProfiler): nimProfile(size)
-
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
-
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
-  let size = addInt(align(GenericSeqSize, typ.base.align), mulInt(len, typ.base.size))
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = newObj(typ, size)
-
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  result = newSeq(typ, len)
-
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize, elemAlign = 1
-  if ol.typ.kind != tyString:
-    elemSize = ol.typ.base.size
-    elemAlign = ol.typ.base.align
-  incTypeSize ol.typ, newsize
-
-  var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when false:
-    # this is wrong since seqs can be shared via 'shallow':
-    when reallyDealloc: rawDealloc(gch.region, ol)
-    else:
-      zeroMem(ol, sizeof(Cell))
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  when defined(memProfiler): nimProfile(newsize-oldsize)
-
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, gch)
-
-{.push profiler:off.}
-
-
-template takeStartTime(workPackageSize) {.dirty.} =
-  const workPackage = workPackageSize
-  var debugticker = 1000
-  when withRealTime:
-    var steps = workPackage
-    var t0: Ticks
-    if gch.maxPause > 0: t0 = getticks()
-
-template takeTime {.dirty.} =
-  when withRealTime: dec steps
-  dec debugticker
-
-template checkTime {.dirty.} =
-  if debugticker <= 0:
-    #echo "in loop"
-    debugticker = 1000
-  when withRealTime:
-    if steps == 0:
-      steps = workPackage
-      if gch.maxPause > 0:
-        let duration = getticks() - t0
-        # the GC's measuring is not accurate and needs some cleanup actions
-        # (stack unmarking), so subtract some short amount of time in
-        # order to miss deadlines less often:
-        if duration >= gch.maxPause - 50_000:
-          return false
-
-# ---------------- dump heap ----------------
-
-template dumpHeapFile(gch: var GcHeap): File =
-  cast[File](gch.pDumpHeapFile)
-
-proc debugGraph(s: PCell) =
-  c_fprintf(gch.dumpHeapFile, "child %p\n", s)
-
-proc dumpRoot(gch: var GcHeap; s: PCell) =
-  if isAllocatedPtr(gch.region, s):
-    c_fprintf(gch.dumpHeapFile, "global_root %p\n", s)
-  else:
-    c_fprintf(gch.dumpHeapFile, "global_root_invalid %p\n", s)
-
-proc GC_dumpHeap*(file: File) =
-  ## Dumps the GCed heap's content to a file. Can be useful for
-  ## debugging. Produces an undocumented text file format that
-  ## can be translated into "dot" syntax via the "heapdump2dot" tool.
-  gch.pDumpHeapFile = file
-  var spaceIter: ObjectSpaceIter
-  when false:
-    var d = gch.decStack.d
-    for i in 0 .. gch.decStack.len-1:
-      if isAllocatedPtr(gch.region, d[i]):
-        c_fprintf(file, "onstack %p\n", d[i])
-      else:
-        c_fprintf(file, "onstack_invalid %p\n", d[i])
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-  while true:
-    let x = allObjectsAsProc(gch.region, addr spaceIter)
-    if spaceIter.state < 0: break
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      writeCell(file, "cell ", c)
-      forAllChildren(c, waDebug)
-      c_fprintf(file, "end\n")
-  gch.pDumpHeapFile = nil
-
-proc GC_dumpHeap() =
-  var f: File
-  if open(f, "heap.txt", fmWrite):
-    GC_dumpHeap(f)
-    f.close()
-  else:
-    c_fprintf(stdout, "cannot write heap.txt")
-
-# ---------------- cycle collector -------------------------------------------
-
-proc freeCyclicCell(gch: var GcHeap, c: PCell) =
-  gcAssert(isAllocatedPtr(gch.region, c), "freeCyclicCell: freed pointer?")
-  prepareDealloc(c)
-  gcTrace(c, csCycFreed)
-  when logGC: writeCell("cycle collector dealloc cell", c)
-  when reallyDealloc:
-    sysAssert(allocInv(gch.region), "free cyclic cell")
-    rawDealloc(gch.region, c)
-  else:
-    gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(Cell))
-
-proc sweep(gch: var GcHeap): bool =
-  takeStartTime(100)
-  #echo "loop start"
-  let white = 1-gch.black
-  #c_fprintf(stdout, "black is %d\n", black)
-  while true:
-    let x = allObjectsAsProc(gch.region, addr gch.spaceIter)
-    if gch.spaceIter.state < 0: break
-    takeTime()
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      gcAssert c.color != rcGrey, "cell is still grey?"
-      if c.color == white: freeCyclicCell(gch, c)
-      # Since this is incremental, we MUST not set the object to 'white' here.
-      # We could set all the remaining objects to white after the 'sweep'
-      # completed but instead we flip the meaning of black/white to save one
-      # traversal over the heap!
-    checkTime()
-  # prepare for next iteration:
-  #echo "loop end"
-  gch.spaceIter = ObjectSpaceIter()
-  result = true
-
-proc markRoot(gch: var GcHeap, c: PCell) {.inline.} =
-  if c.color == 1-gch.black:
-    c.setColor(rcGrey)
-    add(gch.greyStack, c)
-
-proc markIncremental(gch: var GcHeap): bool =
-  var L = addr(gch.greyStack.len)
-  takeStartTime(100)
-  while L[] > 0:
-    var c = gch.greyStack.d[0]
-    if not isAllocatedPtr(gch.region, c):
-      c_fprintf(stdout, "[GC] not allocated anymore: %p\n", c)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-
-    #sysAssert(isAllocatedPtr(gch.region, c), "markIncremental: isAllocatedPtr")
-    gch.greyStack.d[0] = gch.greyStack.d[L[] - 1]
-    dec(L[])
-    takeTime()
-    if c.color == rcGrey:
-      c.setColor(gch.black)
-      forAllChildren(c, waMarkGrey)
-    elif c.color == (1-gch.black):
-      gcAssert false, "wtf why are there white objects in the greystack?"
-    checkTime()
-  gcAssert gch.greyStack.len == 0, "markIncremental: greystack not empty "
-  result = true
-
-proc markGlobals(gch: var GcHeap) =
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-
-proc doOperation(p: pointer, op: WalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  gcAssert(c != nil, "doOperation: 1")
-  # the 'case' should be faster than function pointers because of easy
-  # prediction:
-  case op
-  of waZctDecRef:
-    #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
-    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    discard "use me for nimEscape?"
-  of waMarkGlobal:
-    template handleRoot =
-      if gch.dumpHeapFile.isNil:
-        markRoot(gch, c)
-      else:
-        dumpRoot(gch, c)
-    handleRoot()
-    discard allocInv(gch.region)
-  of waMarkGrey:
-    when false:
-      if not isAllocatedPtr(gch.region, c):
-        c_fprintf(stdout, "[GC] not allocated anymore: MarkGrey %p\n", c)
-        #GC_dumpHeap()
-        sysAssert(false, "wtf")
-    if c.color == 1-gch.black:
-      c.setColor(rcGrey)
-      add(gch.greyStack, c)
-  of waDebug: debugGraph(c)
-
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, WalkOp(op))
-
-proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
-  # the addresses are not as cells on the stack, so turn them to cells:
-  sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
-  if c >% PageSize:
-    # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-    if objStart != nil:
-      # mark the cell:
-      markRoot(gch, objStart)
-  sysAssert(allocInv(gch.region), "gcMark end")
-
-proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-  forEachStackSlot(gch, gcMark)
-
-proc collectALittle(gch: var GcHeap): bool =
-  case gch.phase
-  of Phase.None:
-    if getOccupiedMem(gch.region) >= gch.cycleThreshold:
-      gch.phase = Phase.Marking
-      markGlobals(gch)
-      result = collectALittle(gch)
-      #when false: c_fprintf(stdout, "collectALittle: introduced bug E %ld\n", gch.phase)
-      #discard allocInv(gch.region)
-  of Phase.Marking:
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    prepareForInteriorPointerChecking(gch.region)
-    markStackAndRegisters(gch)
-    inc(gch.stat.stackScans)
-    if markIncremental(gch):
-      gch.phase = Phase.Sweeping
-      gch.red = 1 - gch.red
-  of Phase.Sweeping:
-    gcAssert gch.greyStack.len == 0, "greystack not empty"
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    if sweep(gch):
-      gch.phase = Phase.None
-      # flip black/white meanings:
-      gch.black = 1 - gch.black
-      gcAssert gch.red == 1 - gch.black, "red color is wrong"
-      inc(gch.stat.completedCollections)
-      result = true
-
-proc collectCTBody(gch: var GcHeap) =
-  when withRealTime:
-    let t0 = getticks()
-  sysAssert(allocInv(gch.region), "collectCT: begin")
-
-  when not nimCoroutines:
-    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-  #gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-  if collectALittle(gch):
-    gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                              CycleIncrease)
-    gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
-  sysAssert(allocInv(gch.region), "collectCT: end")
-  when withRealTime:
-    let duration = getticks() - t0
-    gch.stat.maxPause = max(gch.stat.maxPause, duration)
-    when defined(reportMissedDeadlines):
-      if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
-
-when nimCoroutines:
-  proc currentStackSizes(): int =
-    for stack in items(gch.stack):
-      result = result + stack.stackSize()
-
-proc collectCT(gch: var GcHeap) =
-  # stackMarkCosts prevents some pathological behaviour: Stack marking
-  # becomes more expensive with large stacks and large stacks mean that
-  # cells with RC=0 are more likely to be kept alive by the stack.
-  when nimCoroutines:
-    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
-  else:
-    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
-  if (gch.greyStack.len >= stackMarkCosts or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
-      gch.recGcLock == 0:
-    collectCTBody(gch)
-
-when withRealTime:
-  proc toNano(x: int): Nanos {.inline.} =
-    result = x * 1000
-
-  proc GC_setMaxPause*(MaxPauseInUs: int) =
-    gch.maxPause = MaxPauseInUs.toNano
-
-  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    gch.maxPause = us.toNano
-    #if (getOccupiedMem(gch.region)>=gch.cycleThreshold) or
-    #    alwaysGC or strongAdvice:
-    collectCTBody(gch)
-
-  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
-    if stackSize >= 0:
-      var stackTop {.volatile.}: pointer
-      gch.getActiveStack().pos = addr(stackTop)
-
-      for stack in gch.stack.items():
-        stack.bottomSaved = stack.bottom
-        when stackIncreases:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
-        else:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
-
-    GC_step(gch, us, strongAdvice)
-
-    if stackSize >= 0:
-      for stack in gch.stack.items():
-        stack.bottom = stack.bottomSaved
-
-when not defined(useNimRtl):
-  proc GC_disable() =
-    inc(gch.recGcLock)
-  proc GC_enable() =
-    if gch.recGcLock > 0:
-      dec(gch.recGcLock)
-
-  proc GC_setStrategy(strategy: GC_Strategy) =
-    discard
-
-  proc GC_enableMarkAndSweep() = discard
-  proc GC_disableMarkAndSweep() = discard
-
-  proc GC_fullCollect() =
-    var oldThreshold = gch.cycleThreshold
-    gch.cycleThreshold = 0 # forces cycle collection
-    collectCT(gch)
-    gch.cycleThreshold = oldThreshold
-
-  proc GC_getStatistics(): string =
-    GC_disable()
-    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
-             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
-             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
-             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-             "[GC] completed collections: " & $gch.stat.completedCollections & "\n" &
-             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] grey stack capacity: " & $gch.greyStack.cap & "\n" &
-             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
-    when nimCoroutines:
-      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
-      for stack in items(gch.stack):
-        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
-    else:
-      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
-    GC_enable()
-
-{.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index 7f6c41e1b..eb0884560 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -166,16 +166,16 @@ when defined(nimdoc):
     ## this thread will only be initialized once per thread, no matter how often
     ## it is called.
     ##
-    ## This function is available only when ``--threads:on`` and ``--tlsEmulation:off``
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
     ## switches are used
     discard
 
   proc tearDownForeignThreadGc*() {.gcsafe.} =
-    ## Call this to tear down the GC, previously initialized by ``setupForeignThreadGc``.
+    ## Call this to tear down the GC, previously initialized by `setupForeignThreadGc`.
     ## If GC has not been previously initialized, or has already been torn down, the
     ## call does nothing.
     ##
-    ## This function is available only when ``--threads:on`` and ``--tlsEmulation:off``
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
     ## switches are used
     discard
 elif declared(threadType):
@@ -222,9 +222,9 @@ proc stackSize(stack: ptr GcStack): int {.noinline.} =
 
   if pos != nil:
     when stackIncreases:
-      result = cast[ByteAddress](pos) -% cast[ByteAddress](stack.bottom)
+      result = cast[int](pos) -% cast[int](stack.bottom)
     else:
-      result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](pos)
+      result = cast[int](stack.bottom) -% cast[int](pos)
   else:
     result = 0
 
@@ -273,6 +273,9 @@ when nimCoroutines:
     gch.activeStack = gch.stack.find(bottom)
     gch.activeStack.setPosition(addr(sp))
 
+  proc GC_getActiveStack() : pointer {.cdecl, exportc.} =
+    return gch.activeStack.bottom
+
 when not defined(useNimRtl):
   proc nimGC_setStackBottom(theStackBottom: pointer) =
     # Initializes main stack of the thread.
@@ -292,25 +295,28 @@ when not defined(useNimRtl):
       # the first init must be the one that defines the stack bottom:
       gch.stack.bottom = theStackBottom
     elif theStackBottom != gch.stack.bottom:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stack.bottom)
+      var a = cast[int](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[int](gch.stack.bottom)
       #c_fprintf(stdout, "old: %p new: %p;\n",gch.stack.bottom,theStackBottom)
       when stackIncreases:
         gch.stack.bottom = cast[pointer](min(a, b))
       else:
         gch.stack.bottom = cast[pointer](max(a, b))
 
+    when nimCoroutines:
+      if theStackBottom != nil: gch.stack.bottom = theStackBottom
+
     gch.stack.setPosition(theStackBottom)
 {.pop.}
 
 proc isOnStack(p: pointer): bool =
   var stackTop {.volatile, noinit.}: pointer
   stackTop = addr(stackTop)
-  var a = cast[ByteAddress](gch.getActiveStack().bottom)
-  var b = cast[ByteAddress](stackTop)
+  var a = cast[int](gch.getActiveStack().bottom)
+  var b = cast[int](stackTop)
   when not stackIncreases:
     swap(a, b)
-  var x = cast[ByteAddress](p)
+  var x = cast[int](p)
   result = a <=% x and x <=% b
 
 when defined(sparc): # For SPARC architecture.
@@ -331,7 +337,7 @@ when defined(sparc): # For SPARC architecture.
     # Addresses decrease as the stack grows.
     while sp <= max:
       gcMark(gch, sp[])
-      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
+      sp = cast[PPointer](cast[int](sp) +% sizeof(pointer))
 
 elif defined(ELATE):
   {.error: "stack marking code is to be written for this architecture".}
@@ -348,8 +354,8 @@ elif stackIncreases:
 
   template forEachStackSlotAux(gch, gcMark: untyped) {.dirty.} =
     for stack in gch.stack.items():
-      var max = cast[ByteAddress](gch.stack.bottom)
-      var sp = cast[ByteAddress](addr(registers)) -% sizeof(pointer)
+      var max = cast[int](gch.stack.bottom)
+      var sp = cast[int](addr(registers)) -% sizeof(pointer)
       while sp >=% max:
         gcMark(gch, cast[PPointer](sp)[])
         sp = sp -% sizeof(pointer)
@@ -377,15 +383,14 @@ else:
     gch.getActiveStack().setPosition(addr(registers))
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
       for stack in gch.stack.items():
-        var max = cast[ByteAddress](stack.bottom)
-        var sp = cast[ByteAddress](addr(registers))
+        var max = cast[int](stack.bottom)
+        var sp = cast[int](addr(registers))
         when defined(amd64):
           if stack.isActiveStack():
             # words within the jmp_buf structure may not be properly aligned.
             let regEnd = sp +% sizeof(registers)
             while sp <% regEnd:
               gcMark(gch, cast[PPointer](sp)[])
-              gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
               sp = sp +% sizeof(pointer)
         # Make sure sp is word-aligned
         sp = sp and not (sizeof(pointer) - 1)
@@ -409,7 +414,7 @@ else:
 # end of non-portable code
 # ----------------------------------------------------------------------------
 
-proc prepareDealloc(cell: PCell) =
+proc prepareDealloc(cell: PCell) {.raises: [].} =
   when declared(useMarkForDebug):
     when useMarkForDebug:
       gcAssert(cell notin gch.marked, "Cell still alive!")
@@ -426,10 +431,10 @@ proc prepareDealloc(cell: PCell) =
   decTypeSize(cell, t)
 
 proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
-  ## Frees the thread local heap. Runs every finalizer if ``runFinalizers``
-  ## is true. If ``allowGcAfterwards`` is true, a minimal amount of allocation
+  ## Frees the thread local heap. Runs every finalizer if `runFinalizers`
+  ## is true. If `allowGcAfterwards` is true, a minimal amount of allocation
   ## happens to ensure the GC can continue to work after the call
-  ## to ``deallocHeap``.
+  ## to `deallocHeap`.
   template deallocCell(x) =
     if isCell(x):
       # cast to PCell is correct here:
@@ -452,7 +457,7 @@ proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
     initGC()
 
 type
-  GlobalMarkerProc = proc () {.nimcall, benign.}
+  GlobalMarkerProc = proc () {.nimcall, benign, raises: [].}
 var
   globalMarkersLen {.exportc.}: int
   globalMarkers {.exportc.}: array[0..3499, GlobalMarkerProc]
@@ -466,7 +471,7 @@ proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
     inc globalMarkersLen
   else:
     cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
-    quit 1
+    rawQuit 1
 
 proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
   if threadLocalMarkersLen <= high(threadLocalMarkers):
@@ -474,4 +479,4 @@ proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.}
     inc threadLocalMarkersLen
   else:
     cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
-    quit 1
+    rawQuit 1
diff --git a/lib/system/gc_hooks.nim b/lib/system/gc_hooks.nim
index 70f02e657..ace62eea0 100644
--- a/lib/system/gc_hooks.nim
+++ b/lib/system/gc_hooks.nim
@@ -24,7 +24,7 @@ proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
     inc globalMarkersLen
   else:
     cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
-    quit 1
+    rawQuit 1
 
 proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
   if threadLocalMarkersLen <= high(threadLocalMarkers):
@@ -32,7 +32,7 @@ proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.}
     inc threadLocalMarkersLen
   else:
     cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
-    quit 1
+    rawQuit 1
 
 proc traverseGlobals*() =
   for i in 0..globalMarkersLen-1:
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index 6683b9e34..c885a6893 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -27,7 +27,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 type
   WalkOp = enum
@@ -36,7 +36,7 @@ type
                    # local
     waMarkPrecise  # fast precise marking
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
@@ -90,15 +90,15 @@ template gcAssert(cond: bool, msg: string) =
     if not cond:
       cstderr.rawWrite "[GCASSERT] "
       cstderr.rawWrite msg
-      quit 1
+      rawQuit 1
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
@@ -115,10 +115,10 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var GcHeap; size: int) {.benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
+proc collectCT(gch: var GcHeap; size: int) {.benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
 when defined(nimGcRefLeak):
@@ -217,7 +217,7 @@ proc initGC() =
     gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -229,7 +229,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
@@ -255,7 +255,7 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
       when not defined(nimSeqsV2):
-        var d = cast[ByteAddress](cellToUsr(cell))
+        var d = cast[int](cellToUsr(cell))
         var s = cast[PGenericSeq](d)
         if s != nil:
           for i in 0..s.len-1:
@@ -268,7 +268,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch, size + sizeof(Cell))
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   when leakDetector and not hasThreadSupport:
@@ -336,9 +336,9 @@ when not defined(nimSeqsV2):
 
     var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
     copyMem(res, ol, oldsize + sizeof(Cell))
-    zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
+    zeroMem(cast[pointer](cast[int](res)+% oldsize +% sizeof(Cell)),
             newsize-oldsize)
-    sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+    sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
     when withBitvectors: incl(gch.allocated, res)
     when useCellIds:
       inc gch.idGenerator
@@ -428,15 +428,6 @@ proc sweep(gch: var GcHeap) =
         if c.refcount == rcBlack: c.refcount = rcWhite
         else: freeCyclicCell(gch, c)
 
-when false:
-  proc newGcInvariant*() =
-    for x in allObjects(gch.region):
-      if isCell(x):
-        var c = cast[PCell](x)
-        if c.typ == nil:
-          writeStackTrace()
-          quit 1
-
 proc markGlobals(gch: var GcHeap) =
   if gch.gcThreadId == 0:
     when defined(nimTracing):
@@ -455,11 +446,10 @@ proc markGlobals(gch: var GcHeap) =
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       mark(gch, objStart)
 
@@ -507,7 +497,7 @@ when not defined(useNimRtl):
     gch.cycleThreshold = InitialThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
   when defined(nimTracing):
diff --git a/lib/system/gc_regions.nim b/lib/system/gc_regions.nim
index 4f802c812..d96de7eac 100644
--- a/lib/system/gc_regions.nim
+++ b/lib/system/gc_regions.nim
@@ -7,6 +7,10 @@
 #
 
 # "Stack GC" for embedded devices or ultra performance requirements.
+import std/private/syslocks
+
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int) {.benign.}
 
 when defined(useMalloc):
   proc roundup(x, v: int): int {.inline.} =
@@ -84,16 +88,16 @@ type
     region: ptr MemRegion
 
 var
-  tlRegion {.threadVar.}: MemRegion
-#  tempStrRegion {.threadVar.}: MemRegion  # not yet used
+  tlRegion {.threadvar.}: MemRegion
+#  tempStrRegion {.threadvar.}: MemRegion  # not yet used
 
-template withRegion*(r: MemRegion; body: untyped) =
+template withRegion*(r: var MemRegion; body: untyped) =
   let oldRegion = tlRegion
   tlRegion = r
   try:
     body
   finally:
-    #r = tlRegion
+    r = tlRegion
     tlRegion = oldRegion
 
 template inc(p: pointer, s: int) =
@@ -262,14 +266,13 @@ when false:
     setObstackPtr(obs)
 
 template withScratchRegion*(body: untyped) =
-  var scratch: MemRegion
   let oldRegion = tlRegion
-  tlRegion = scratch
+  tlRegion = MemRegion()
   try:
     body
   finally:
+    deallocAll()
     tlRegion = oldRegion
-    deallocAll(scratch)
 
 when false:
   proc joinRegion*(dest: var MemRegion; src: MemRegion) =
@@ -435,5 +438,5 @@ proc getTotalMem*(r: MemRegion): int =
 
 proc nimGC_setStackBottom(theStackBottom: pointer) = discard
 
-proc nimGCref(x: pointer) {.compilerProc.} = discard
-proc nimGCunref(x: pointer) {.compilerProc.} = discard
+proc nimGCref(x: pointer) {.compilerproc.} = discard
+proc nimGCunref(x: pointer) {.compilerproc.} = discard
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index b77f7ccde..a26aff982 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -40,7 +40,7 @@ type
     tyPointer,
     tyOpenArray,
     tyString,
-    tyCString,
+    tyCstring,
     tyForward,
     tyInt,
     tyInt8,
@@ -59,7 +59,7 @@ type
     tyOwned, tyUnused1, tyUnused2,
     tyVarargsHidden,
     tyUncheckedArray,
-    tyProxyHidden,
+    tyErrorHidden,
     tyBuiltInTypeClassHidden,
     tyUserTypeClassHidden,
     tyUserTypeClassInstHidden,
@@ -98,6 +98,8 @@ type
     finalizer*: pointer # the finalizer for the type
     marker*: proc (p: pointer, op: int) {.nimcall, benign, tags: [], raises: [].} # marker proc for GC
     deepcopy: proc (p: pointer): pointer {.nimcall, benign, tags: [], raises: [].}
+    when defined(nimSeqsV2):
+      typeInfoV2*: pointer
     when defined(nimTypeNames):
       name: cstring
       nextType: ptr TNimType
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index 4193f2bdd..3bf0b9893 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -30,12 +30,13 @@ when defined(createNimRtl):
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, exportc: "nimrtl_$1", dynlib.}
 elif defined(useNimRtl):
-  when defined(windows):
-    const nimrtl* = "nimrtl.dll"
-  elif defined(macosx):
-    const nimrtl* = "libnimrtl.dylib"
-  else:
-    const nimrtl* = "libnimrtl.so"
+  #[
+  `{.rtl.}` should only be used for non-generic procs.
+  ]#
+  const nimrtl* =
+    when defined(windows): "nimrtl.dll"
+    elif defined(macosx): "libnimrtl.dylib"
+    else: "libnimrtl.so"
   {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl, gcsafe.}
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, importc: "nimrtl_$1", dynlib: nimrtl.}
@@ -44,7 +45,6 @@ else:
   {.pragma: inl, inline.}
   {.pragma: compilerRtl, compilerproc.}
 
-when defined(nimlocks):
-  {.pragma: benign, gcsafe, locks: 0.}
-else:
-  {.pragma: benign, gcsafe.}
+{.pragma: benign, gcsafe.}
+
+{.push sinkInference: on.}
diff --git a/lib/system/indexerrors.nim b/lib/system/indexerrors.nim
index 1b91789bd..6a8cb8a0a 100644
--- a/lib/system/indexerrors.nim
+++ b/lib/system/indexerrors.nim
@@ -1,4 +1,5 @@
 # imported by other modules, unlike helpers.nim which is included
+# xxx this is now included instead of imported, we should import instead
 
 template formatErrorIndexBound*[T](i, a, b: T): string =
   when defined(standalone):
@@ -9,3 +10,6 @@ template formatErrorIndexBound*[T](i, a, b: T): string =
 
 template formatErrorIndexBound*[T](i, n: T): string =
   formatErrorIndexBound(i, 0, n)
+
+template formatFieldDefect*(f, discVal): string =
+  f & discVal & "'"
diff --git a/lib/system/indices.nim b/lib/system/indices.nim
new file mode 100644
index 000000000..f2bad2528
--- /dev/null
+++ b/lib/system/indices.nim
@@ -0,0 +1,164 @@
+when not defined(nimHasSystemRaisesDefect):
+  {.pragma: systemRaisesDefect.}
+
+type
+  BackwardsIndex* = distinct int ## Type that is constructed by `^` for
+                                 ## reversed array accesses.
+                                 ## (See `^ template <#^.t,int>`_)
+
+template `^`*(x: int): BackwardsIndex = BackwardsIndex(x)
+  ## Builtin `roof`:idx: operator that can be used for convenient array access.
+  ## `a[^x]` is a shortcut for `a[a.len-x]`.
+  ##
+  ##   ```nim
+  ##   let
+  ##     a = [1, 3, 5, 7, 9]
+  ##     b = "abcdefgh"
+  ##
+  ##   echo a[^1] # => 9
+  ##   echo b[^2] # => g
+  ##   ```
+
+proc `[]`*[T](s: openArray[T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+
+proc `[]`*[Idx, T](a: array[Idx, T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: string; i: BackwardsIndex): char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]`*[T](s: var openArray[T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+proc `[]`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: var string; i: BackwardsIndex): var char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]=`*[T](s: var openArray[T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  system.`[]=`(s, s.len - int(i), x)
+proc `[]=`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))] = x
+proc `[]=`*(s: var string; i: BackwardsIndex; x: char) {.inline, systemRaisesDefect.} =
+  s[s.len - int(i)] = x
+
+template `..^`*(a, b: untyped): untyped =
+  ## A shortcut for `.. ^` to avoid the common gotcha that a space between
+  ## '..' and '^' is required.
+  a .. ^b
+
+template `..<`*(a, b: untyped): untyped =
+  ## A shortcut for `a .. pred(b)`.
+  ##   ```nim
+  ##   for i in 5 ..< 9:
+  ##     echo i # => 5; 6; 7; 8
+  ##   ```
+  a .. (when b is BackwardsIndex: succ(b) else: pred(b))
+
+template `[]`*(s: string; i: int): char = arrGet(s, i)
+template `[]=`*(s: string; i: int; val: char) = arrPut(s, i, val)
+
+template `^^`(s, i: untyped): untyped =
+  (when i is BackwardsIndex: s.len - int(i) else: int(i))
+
+template spliceImpl(s, a, L, b: typed): untyped =
+  # make room for additional elements or cut:
+  var shift = b.len - max(0,L)  # ignore negative slice size
+  var newLen = s.len + shift
+  if shift > 0:
+    # enlarge:
+    setLen(s, newLen)
+    for i in countdown(newLen-1, a+b.len): movingCopy(s[i], s[i-shift])
+  else:
+    for i in countup(a+b.len, newLen-1): movingCopy(s[i], s[i-shift])
+    # cut down:
+    setLen(s, newLen)
+  # fill the hole:
+  for i in 0 ..< b.len: s[a+i] = b[i]
+
+proc `[]`*[T, U: Ordinal](s: string, x: HSlice[T, U]): string {.inline, systemRaisesDefect.} =
+  ## Slice operation for strings.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = "abcdef"
+  ##   assert s[1..3] == "bcd"
+  ##   ```
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  result = newString(L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T, U: Ordinal](s: var string, x: HSlice[T, U], b: string) {.systemRaisesDefect.} =
+  ## Slice assignment for strings.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed:
+  ##
+  runnableExamples:
+    var s = "abcdefgh"
+    s[1 .. ^2] = "xyz"
+    assert s == "axyzh"
+
+  var a = s ^^ x.a
+  var L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0..<L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
+
+proc `[]`*[Idx, T; U, V: Ordinal](a: array[Idx, T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for arrays.
+  ## Returns the inclusive range `[a[x.a], a[x.b]]`:
+  ##   ```nim
+  ##   var a = [1, 2, 3, 4]
+  ##   assert a[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(array[I, T];I,I) <#toOpenArray,array[I,T],I,I>`_
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  result = newSeq[T](L)
+  for i in 0..<L: result[i] = a[Idx(i + xa)]
+
+proc `[]=`*[Idx, T; U, V: Ordinal](a: var array[Idx, T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for arrays.
+  ##   ```nim
+  ##   var a = [10, 20, 30, 40, 50]
+  ##   a[1..2] = @[99, 88]
+  ##   assert a == [10, 99, 88, 40, 50]
+  ##   ```
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  if L == b.len:
+    for i in 0..<L: a[Idx(i + xa)] = b[i]
+  else:
+    sysFatal(RangeDefect, "different lengths for slice assignment")
+
+proc `[]`*[T; U, V: Ordinal](s: openArray[T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for sequences.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = @[1, 2, 3, 4]
+  ##   assert s[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(openArray[T];int,int) <#toOpenArray,openArray[T],int,int>`_
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  newSeq(result, L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T; U, V: Ordinal](s: var seq[T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for sequences.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed.
+  runnableExamples:
+    var s = @"abcdefgh"
+    s[1 .. ^2] = @"xyz"
+    assert s == @"axyzh"
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0 ..< L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
diff --git a/lib/system/io.nim b/lib/system/io.nim
deleted file mode 100644
index 482057214..000000000
--- a/lib/system/io.nim
+++ /dev/null
@@ -1,852 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2019 Nim contributors
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## This is a part of ``system.nim``, you should not manually import it.
-
-
-include inclrtl
-import std/private/since
-import formatfloat
-
-# ----------------- IO Part ------------------------------------------------
-type
-  CFile {.importc: "FILE", header: "<stdio.h>",
-          incompleteStruct.} = object
-  File* = ptr CFile ## The type representing a file handle.
-
-  FileMode* = enum           ## The file mode when opening a file.
-    fmRead,                   ## Open the file for read access only.
-    fmWrite,                  ## Open the file for write access only.
-                              ## If the file does not exist, it will be
-                              ## created. Existing files will be cleared!
-    fmReadWrite,              ## Open the file for read and write access.
-                              ## If the file does not exist, it will be
-                              ## created. Existing files will be cleared!
-    fmReadWriteExisting,      ## Open the file for read and write access.
-                              ## If the file does not exist, it will not be
-                              ## created. The existing file will not be cleared.
-    fmAppend                  ## Open the file for writing only; append data
-                              ## at the end.
-
-  FileHandle* = cint ## type that represents an OS file handle; this is
-                      ## useful for low-level file access
-
-# text file handling:
-when not defined(nimscript) and not defined(js):
-  # duplicated between io and ansi_c
-  const stdioUsesMacros = (defined(osx) or defined(freebsd) or defined(dragonfly)) and not defined(emscripten)
-  const stderrName = when stdioUsesMacros: "__stderrp" else: "stderr"
-  const stdoutName = when stdioUsesMacros: "__stdoutp" else: "stdout"
-  const stdinName = when stdioUsesMacros: "__stdinp" else: "stdin"
-
-  var
-    stdin* {.importc: stdinName, header: "<stdio.h>".}: File
-      ## The standard input stream.
-    stdout* {.importc: stdoutName, header: "<stdio.h>".}: File
-      ## The standard output stream.
-    stderr* {.importc: stderrName, header: "<stdio.h>".}: File
-      ## The standard error stream.
-
-when defined(useStdoutAsStdmsg):
-  template stdmsg*: File = stdout
-else:
-  template stdmsg*: File = stderr
-    ## Template which expands to either stdout or stderr depending on
-    ## `useStdoutAsStdmsg` compile-time switch.
-
-when defined(windows):
-  proc c_fileno(f: File): cint {.
-    importc: "_fileno", header: "<stdio.h>".}
-else:
-  proc c_fileno(f: File): cint {.
-    importc: "fileno", header: "<fcntl.h>".}
-
-when defined(windows):
-  proc c_fdopen(filehandle: cint, mode: cstring): File {.
-    importc: "_fdopen", header: "<stdio.h>".}
-else:
-  proc c_fdopen(filehandle: cint, mode: cstring): File {.
-    importc: "fdopen", header: "<stdio.h>".}
-proc c_fputs(c: cstring, f: File): cint {.
-  importc: "fputs", header: "<stdio.h>", tags: [WriteIOEffect].}
-proc c_fgets(c: cstring, n: cint, f: File): cstring {.
-  importc: "fgets", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc c_fgetc(stream: File): cint {.
-  importc: "fgetc", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc c_ungetc(c: cint, f: File): cint {.
-  importc: "ungetc", header: "<stdio.h>", tags: [].}
-proc c_putc(c: cint, stream: File): cint {.
-  importc: "putc", header: "<stdio.h>", tags: [WriteIOEffect].}
-proc c_fflush(f: File): cint {.
-  importc: "fflush", header: "<stdio.h>".}
-proc c_fclose(f: File): cint {.
-  importc: "fclose", header: "<stdio.h>".}
-proc c_clearerr(f: File) {.
-  importc: "clearerr", header: "<stdio.h>".}
-proc c_feof(f: File): cint {.
-  importc: "feof", header: "<stdio.h>".}
-
-when not declared(c_fwrite):
-  proc c_fwrite(buf: pointer, size, n: csize_t, f: File): cint {.
-    importc: "fwrite", header: "<stdio.h>".}
-
-# C routine that is used here:
-proc c_fread(buf: pointer, size, n: csize_t, f: File): csize_t {.
-  importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
-when defined(windows):
-  when not defined(amd64):
-    proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-      importc: "fseek", header: "<stdio.h>", tags: [].}
-    proc c_ftell(f: File): int64 {.
-      importc: "ftell", header: "<stdio.h>", tags: [].}
-  else:
-    proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-      importc: "_fseeki64", header: "<stdio.h>", tags: [].}
-    proc c_ftell(f: File): int64 {.
-      importc: "_ftelli64", header: "<stdio.h>", tags: [].}
-else:
-  proc c_fseek(f: File, offset: int64, whence: cint): cint {.
-    importc: "fseeko", header: "<stdio.h>", tags: [].}
-  proc c_ftell(f: File): int64 {.
-    importc: "ftello", header: "<stdio.h>", tags: [].}
-proc c_ferror(f: File): cint {.
-  importc: "ferror", header: "<stdio.h>", tags: [].}
-proc c_setvbuf(f: File, buf: pointer, mode: cint, size: csize_t): cint {.
-  importc: "setvbuf", header: "<stdio.h>", tags: [].}
-
-proc c_fprintf(f: File, frmt: cstring): cint {.
-  importc: "fprintf", header: "<stdio.h>", varargs, discardable.}
-proc c_fputc(c: char, f: File): cint {.
-  importc: "fputc", header: "<stdio.h>".}
-
-# When running nim in android app, stdout goes nowhere, so echo gets ignored
-# To redirect echo to the android logcat, use -d:androidNDK
-when defined(androidNDK):
-  const ANDROID_LOG_VERBOSE = 2.cint
-  proc android_log_print(prio: cint, tag: cstring, fmt: cstring): cint
-    {.importc: "__android_log_print", header: "<android/log.h>", varargs, discardable.}
-
-template sysFatal(exc, msg) =
-  raise newException(exc, msg)
-
-proc raiseEIO(msg: string) {.noinline, noreturn.} =
-  sysFatal(IOError, msg)
-
-proc raiseEOF() {.noinline, noreturn.} =
-  sysFatal(EOFError, "EOF reached")
-
-proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".}
-
-when not defined(NimScript):
-  var
-    errno {.importc, header: "<errno.h>".}: cint ## error variable
-    EINTR {.importc: "EINTR", header: "<errno.h>".}: cint
-
-proc checkErr(f: File) =
-  when not defined(NimScript):
-    if c_ferror(f) != 0:
-      let msg = "errno: " & $errno & " `" & $strerror(errno) & "`"
-      c_clearerr(f)
-      raiseEIO(msg)
-  else:
-    # shouldn't happen
-    quit(1)
-
-{.push stackTrace:off, profiler:off.}
-proc readBuffer*(f: File, buffer: pointer, len: Natural): int {.
-  tags: [ReadIOEffect], benign.} =
-  ## reads `len` bytes into the buffer pointed to by `buffer`. Returns
-  ## the actual number of bytes that have been read which may be less than
-  ## `len` (if not as many bytes are remaining), but not greater.
-  result = cast[int](c_fread(buffer, 1, cast[csize_t](len), f))
-  if result != len: checkErr(f)
-
-proc readBytes*(f: File, a: var openArray[int8|uint8], start, len: Natural): int {.
-  tags: [ReadIOEffect], benign.} =
-  ## reads `len` bytes into the buffer `a` starting at ``a[start]``. Returns
-  ## the actual number of bytes that have been read which may be less than
-  ## `len` (if not as many bytes are remaining), but not greater.
-  result = readBuffer(f, addr(a[start]), len)
-
-proc readChars*(f: File, a: var openArray[char], start, len: Natural): int {.
-  tags: [ReadIOEffect], benign.} =
-  ## reads `len` bytes into the buffer `a` starting at ``a[start]``. Returns
-  ## the actual number of bytes that have been read which may be less than
-  ## `len` (if not as many bytes are remaining), but not greater.
-  ##
-  ## **Warning:** The buffer `a` must be pre-allocated. This can be done
-  ## using, for example, ``newString``.
-  if (start + len) > len(a):
-    raiseEIO("buffer overflow: (start+len) > length of openarray buffer")
-  result = readBuffer(f, addr(a[start]), len)
-
-proc write*(f: File, c: cstring) {.tags: [WriteIOEffect], benign.} =
-  ## Writes a value to the file `f`. May throw an IO exception.
-  discard c_fputs(c, f)
-  checkErr(f)
-
-proc writeBuffer*(f: File, buffer: pointer, len: Natural): int {.
-  tags: [WriteIOEffect], benign.} =
-  ## writes the bytes of buffer pointed to by the parameter `buffer` to the
-  ## file `f`. Returns the number of actual written bytes, which may be less
-  ## than `len` in case of an error.
-  result = cast[int](c_fwrite(buffer, 1, cast[csize_t](len), f))
-  checkErr(f)
-
-proc writeBytes*(f: File, a: openArray[int8|uint8], start, len: Natural): int {.
-  tags: [WriteIOEffect], benign.} =
-  ## writes the bytes of ``a[start..start+len-1]`` to the file `f`. Returns
-  ## the number of actual written bytes, which may be less than `len` in case
-  ## of an error.
-  var x = cast[ptr UncheckedArray[int8]](a)
-  result = writeBuffer(f, addr(x[int(start)]), len)
-
-proc writeChars*(f: File, a: openArray[char], start, len: Natural): int {.
-  tags: [WriteIOEffect], benign.} =
-  ## writes the bytes of ``a[start..start+len-1]`` to the file `f`. Returns
-  ## the number of actual written bytes, which may be less than `len` in case
-  ## of an error.
-  var x = cast[ptr UncheckedArray[int8]](a)
-  result = writeBuffer(f, addr(x[int(start)]), len)
-
-when defined(windows):
-  proc writeWindows(f: File; s: string; doRaise = false) =
-    # Don't ask why but the 'printf' family of function is the only thing
-    # that writes utf-8 strings reliably on Windows. At least on my Win 10
-    # machine. We also enable `setConsoleOutputCP(65001)` now by default.
-    # But we cannot call printf directly as the string might contain \0.
-    # So we have to loop over all the sections separated by potential \0s.
-    var i = c_fprintf(f, "%s", s)
-    while i < s.len:
-      if s[i] == '\0':
-        let w = c_fputc('\0', f)
-        if w != 0:
-          if doRaise: raiseEIO("cannot write string to file")
-          break
-        inc i
-      else:
-        let w = c_fprintf(f, "%s", unsafeAddr s[i])
-        if w <= 0:
-          if doRaise: raiseEIO("cannot write string to file")
-          break
-        inc i, w
-
-proc write*(f: File, s: string) {.tags: [WriteIOEffect], benign.} =
-  when defined(windows):
-    writeWindows(f, s, doRaise = true)
-  else:
-    if writeBuffer(f, cstring(s), s.len) != s.len:
-      raiseEIO("cannot write string to file")
-{.pop.}
-
-when NoFakeVars:
-  when defined(windows):
-    const
-      IOFBF = cint(0)
-      IONBF = cint(4)
-  else:
-    # On all systems I could find, including Linux, Mac OS X, and the BSDs
-    const
-      IOFBF = cint(0)
-      IONBF = cint(2)
-else:
-  var
-    IOFBF {.importc: "_IOFBF", nodecl.}: cint
-    IONBF {.importc: "_IONBF", nodecl.}: cint
-
-const SupportIoctlInheritCtl = (defined(linux) or defined(bsd)) and
-                              not defined(nimscript)
-when SupportIoctlInheritCtl:
-  var
-    FIOCLEX {.importc, header: "<sys/ioctl.h>".}: cint
-    FIONCLEX {.importc, header: "<sys/ioctl.h>".}: cint
-
-  proc c_ioctl(fd: cint, request: cint): cint {.
-    importc: "ioctl", header: "<sys/ioctl.h>", varargs.}
-elif defined(posix) and not defined(nimscript):
-  var
-    F_GETFD {.importc, header: "<fcntl.h>".}: cint
-    F_SETFD {.importc, header: "<fcntl.h>".}: cint
-    FD_CLOEXEC {.importc, header: "<fcntl.h>".}: cint
-
-  proc c_fcntl(fd: cint, cmd: cint): cint {.
-    importc: "fcntl", header: "<fcntl.h>", varargs.}
-elif defined(windows):
-  const HANDLE_FLAG_INHERIT = culong 0x1
-  proc getOsfhandle(fd: cint): int {.
-    importc: "_get_osfhandle", header: "<io.h>".}
-
-  proc setHandleInformation(handle: int, mask, flags: culong): cint {.
-    importc: "SetHandleInformation", header: "<handleapi.h>".}
-
-const
-  BufSize = 4000
-
-proc close*(f: File) {.tags: [], gcsafe.} =
-  ## Closes the file.
-  if not f.isNil:
-    discard c_fclose(f)
-
-proc readChar*(f: File): char {.tags: [ReadIOEffect].} =
-  ## Reads a single character from the stream `f`. Should not be used in
-  ## performance sensitive code.
-  let x = c_fgetc(f)
-  if x < 0:
-    checkErr(f)
-    raiseEOF()
-  result = char(x)
-
-proc flushFile*(f: File) {.tags: [WriteIOEffect].} =
-  ## Flushes `f`'s buffer.
-  discard c_fflush(f)
-
-proc getFileHandle*(f: File): FileHandle =
-  ## returns the file handle of the file ``f``. This is only useful for
-  ## platform specific programming.
-  ## Note that on Windows this doesn't return the Windows-specific handle,
-  ## but the C library's notion of a handle, whatever that means.
-  ## Use `getOsFileHandle` instead.
-  c_fileno(f)
-
-proc getOsFileHandle*(f: File): FileHandle =
-  ## returns the OS file handle of the file ``f``. This is only useful for
-  ## platform specific programming.
-  when defined(windows):
-    result = FileHandle getOsfhandle(cint getFileHandle(f))
-  else:
-    result = c_fileno(f)
-
-when defined(nimdoc) or (defined(posix) and not defined(nimscript)) or defined(windows):
-  proc setInheritable*(f: FileHandle, inheritable: bool): bool =
-    ## control whether a file handle can be inherited by child processes. Returns
-    ## ``true`` on success. This requires the OS file handle, which can be
-    ## retrieved via `getOsFileHandle <#getOsFileHandle,File>`_.
-    ##
-    ## This procedure is not guaranteed to be available for all platforms. Test for
-    ## availability with `declared() <system.html#declared,untyped>`.
-    when SupportIoctlInheritCtl:
-      result = c_ioctl(f, if inheritable: FIONCLEX else: FIOCLEX) != -1
-    elif defined(posix):
-      var flags = c_fcntl(f, F_GETFD)
-      if flags == -1:
-        return false
-      flags = if inheritable: flags and not FD_CLOEXEC else: flags or FD_CLOEXEC
-      result = c_fcntl(f, F_SETFD, flags) != -1
-    else:
-      result = setHandleInformation(f.int, HANDLE_FLAG_INHERIT, culong inheritable) != 0
-
-proc readLine*(f: File, line: var TaintedString): bool {.tags: [ReadIOEffect],
-              benign.} =
-  ## reads a line of text from the file `f` into `line`. May throw an IO
-  ## exception.
-  ## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
-  ## character(s) are not part of the returned string. Returns ``false``
-  ## if the end of the file has been reached, ``true`` otherwise. If
-  ## ``false`` is returned `line` contains no new data.
-  proc c_memchr(s: pointer, c: cint, n: csize_t): pointer {.
-    importc: "memchr", header: "<string.h>".}
-
-  var pos = 0
-
-  # Use the currently reserved space for a first try
-  var sp = max(line.string.len, 80)
-  line.string.setLen(sp)
-
-  while true:
-    # memset to \L so that we can tell how far fgets wrote, even on EOF, where
-    # fgets doesn't append an \L
-    for i in 0..<sp: line.string[pos+i] = '\L'
-
-    var fgetsSuccess: bool
-    while true:
-      # fixes #9634; this pattern may need to be abstracted as a template if reused;
-      # likely other io procs need this for correctness.
-      fgetsSuccess = c_fgets(addr line.string[pos], sp.cint, f) != nil
-      if fgetsSuccess: break
-      when not defined(NimScript):
-        if errno == EINTR:
-          errno = 0
-          c_clearerr(f)
-          continue
-      checkErr(f)
-      break
-
-    let m = c_memchr(addr line.string[pos], '\L'.ord, cast[csize_t](sp))
-    if m != nil:
-      # \l found: Could be our own or the one by fgets, in any case, we're done
-      var last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
-      if last > 0 and line.string[last-1] == '\c':
-        line.string.setLen(last-1)
-        return last > 1 or fgetsSuccess
-        # We have to distinguish between two possible cases:
-        # \0\l\0 => line ending in a null character.
-        # \0\l\l => last line without newline, null was put there by fgets.
-      elif last > 0 and line.string[last-1] == '\0':
-        if last < pos + sp - 1 and line.string[last+1] != '\0':
-          dec last
-      line.string.setLen(last)
-      return last > 0 or fgetsSuccess
-    else:
-      # fgets will have inserted a null byte at the end of the string.
-      dec sp
-    # No \l found: Increase buffer and read more
-    inc pos, sp
-    sp = 128 # read in 128 bytes at a time
-    line.string.setLen(pos+sp)
-
-proc readLine*(f: File): TaintedString  {.tags: [ReadIOEffect], benign.} =
-  ## reads a line of text from the file `f`. May throw an IO exception.
-  ## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
-  ## character(s) are not part of the returned string.
-  result = TaintedString(newStringOfCap(80))
-  if not readLine(f, result): raiseEOF()
-
-proc write*(f: File, i: int) {.tags: [WriteIOEffect], benign.} =
-  when sizeof(int) == 8:
-    if c_fprintf(f, "%lld", i) < 0: checkErr(f)
-  else:
-    if c_fprintf(f, "%ld", i) < 0: checkErr(f)
-
-proc write*(f: File, i: BiggestInt) {.tags: [WriteIOEffect], benign.} =
-  when sizeof(BiggestInt) == 8:
-    if c_fprintf(f, "%lld", i) < 0: checkErr(f)
-  else:
-    if c_fprintf(f, "%ld", i) < 0: checkErr(f)
-
-proc write*(f: File, b: bool) {.tags: [WriteIOEffect], benign.} =
-  if b: write(f, "true")
-  else: write(f, "false")
-
-proc write*(f: File, r: float32) {.tags: [WriteIOEffect], benign.} =
-  var buffer {.noinit.}: array[65, char]
-  discard writeFloatToBuffer(buffer, r)
-  if c_fprintf(f, "%s", buffer[0].addr) < 0: checkErr(f)
-
-proc write*(f: File, r: BiggestFloat) {.tags: [WriteIOEffect], benign.} =
-  var buffer {.noinit.}: array[65, char]
-  discard writeFloatToBuffer(buffer, r)
-  if c_fprintf(f, "%s", buffer[0].addr) < 0: checkErr(f)
-
-proc write*(f: File, c: char) {.tags: [WriteIOEffect], benign.} =
-  discard c_putc(cint(c), f)
-
-proc write*(f: File, a: varargs[string, `$`]) {.tags: [WriteIOEffect], benign.} =
-  for x in items(a): write(f, x)
-
-proc readAllBuffer(file: File): string =
-  # This proc is for File we want to read but don't know how many
-  # bytes we need to read before the buffer is empty.
-  result = ""
-  var buffer = newString(BufSize)
-  while true:
-    var bytesRead = readBuffer(file, addr(buffer[0]), BufSize)
-    if bytesRead == BufSize:
-      result.add(buffer)
-    else:
-      buffer.setLen(bytesRead)
-      result.add(buffer)
-      break
-
-proc rawFileSize(file: File): int64 =
-  # this does not raise an error opposed to `getFileSize`
-  var oldPos = c_ftell(file)
-  discard c_fseek(file, 0, 2) # seek the end of the file
-  result = c_ftell(file)
-  discard c_fseek(file, oldPos, 0)
-
-proc endOfFile*(f: File): bool {.tags: [], benign.} =
-  ## Returns true if `f` is at the end.
-  var c = c_fgetc(f)
-  discard c_ungetc(c, f)
-  return c < 0'i32
-  #result = c_feof(f) != 0
-
-proc readAllFile(file: File, len: int64): string =
-  # We acquire the filesize beforehand and hope it doesn't change.
-  # Speeds things up.
-  result = newString(len)
-  let bytes = readBuffer(file, addr(result[0]), len)
-  if endOfFile(file):
-    if bytes < len:
-      result.setLen(bytes)
-  else:
-    # We read all the bytes but did not reach the EOF
-    # Try to read it as a buffer
-    result.add(readAllBuffer(file))
-
-proc readAllFile(file: File): string =
-  var len = rawFileSize(file)
-  result = readAllFile(file, len)
-
-proc readAll*(file: File): TaintedString {.tags: [ReadIOEffect], benign.} =
-  ## Reads all data from the stream `file`.
-  ##
-  ## Raises an IO exception in case of an error. It is an error if the
-  ## current file position is not at the beginning of the file.
-
-  # Separate handling needed because we need to buffer when we
-  # don't know the overall length of the File.
-  when declared(stdin):
-    let len = if file != stdin: rawFileSize(file) else: -1
-  else:
-    let len = rawFileSize(file)
-  if len > 0:
-    result = readAllFile(file, len).TaintedString
-  else:
-    result = readAllBuffer(file).TaintedString
-
-proc writeLine*[Ty](f: File, x: varargs[Ty, `$`]) {.inline,
-                          tags: [WriteIOEffect], benign.} =
-  ## writes the values `x` to `f` and then writes "\\n".
-  ## May throw an IO exception.
-  for i in items(x):
-    write(f, i)
-  write(f, "\n")
-
-# interface to the C procs:
-
-when defined(windows) and not defined(useWinAnsi):
-  when defined(cpp):
-    proc wfopen(filename, mode: WideCString): pointer {.
-      importcpp: "_wfopen((const wchar_t*)#, (const wchar_t*)#)", nodecl.}
-    proc wfreopen(filename, mode: WideCString, stream: File): File {.
-      importcpp: "_wfreopen((const wchar_t*)#, (const wchar_t*)#, #)", nodecl.}
-  else:
-    proc wfopen(filename, mode: WideCString): pointer {.
-      importc: "_wfopen", nodecl.}
-    proc wfreopen(filename, mode: WideCString, stream: File): File {.
-      importc: "_wfreopen", nodecl.}
-
-  proc fopen(filename, mode: cstring): pointer =
-    var f = newWideCString(filename)
-    var m = newWideCString(mode)
-    result = wfopen(f, m)
-
-  proc freopen(filename, mode: cstring, stream: File): File =
-    var f = newWideCString(filename)
-    var m = newWideCString(mode)
-    result = wfreopen(f, m, stream)
-
-else:
-  proc fopen(filename, mode: cstring): pointer {.importc: "fopen", nodecl.}
-  proc freopen(filename, mode: cstring, stream: File): File {.
-    importc: "freopen", nodecl.}
-
-const
-  NoInheritFlag =
-    # Platform specific flag for creating a File without inheritance.
-    when not defined(nimInheritHandles):
-      when defined(windows):
-        "N"
-      elif defined(linux) or defined(bsd):
-        "e"
-      else:
-        ""
-    else:
-      ""
-  FormatOpen: array[FileMode, string] = [
-    "rb" & NoInheritFlag, "wb" & NoInheritFlag, "w+b" & NoInheritFlag,
-    "r+b" & NoInheritFlag, "ab" & NoInheritFlag
-  ]
-    #"rt", "wt", "w+t", "r+t", "at"
-    # we always use binary here as for Nim the OS line ending
-    # should not be translated.
-
-when defined(posix) and not defined(nimscript):
-  when defined(linux) and defined(amd64):
-    type
-      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
-
-      # fillers ensure correct size & offsets
-      Stat {.importc: "struct stat",
-              header: "<sys/stat.h>", final, pure.} = object ## struct stat
-        filler_1: array[24, char]
-        st_mode: Mode        ## Mode of file
-        filler_2: array[144 - 24 - 4, char]
-
-    proc modeIsDir(m: Mode): bool =
-      ## Test for a directory.
-      (m and 0o170000) == 0o40000
-
-  else:
-    type
-      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
-
-      Stat {.importc: "struct stat",
-               header: "<sys/stat.h>", final, pure.} = object ## struct stat
-        st_mode: Mode        ## Mode of file
-
-    proc modeIsDir(m: Mode): bool {.importc: "S_ISDIR", header: "<sys/stat.h>".}
-      ## Test for a directory.
-
-  proc c_fstat(a1: cint, a2: var Stat): cint {.
-    importc: "fstat", header: "<sys/stat.h>".}
-
-
-proc open*(f: var File, filename: string,
-          mode: FileMode = fmRead,
-          bufSize: int = -1): bool {.tags: [], raises: [], benign.} =
-  ## Opens a file named `filename` with given `mode`.
-  ##
-  ## Default mode is readonly. Returns true if the file could be opened.
-  ## This throws no exception if the file could not be opened.
-  ##
-  ## The file handle associated with the resulting ``File`` is not inheritable.
-  var p = fopen(filename, FormatOpen[mode])
-  if p != nil:
-    var f2 = cast[File](p)
-    when defined(posix) and not defined(nimscript):
-      # How `fopen` handles opening a directory is not specified in ISO C and
-      # POSIX. We do not want to handle directories as regular files that can
-      # be opened.
-      var res {.noinit.}: Stat
-      if c_fstat(getFileHandle(f2), res) >= 0'i32 and modeIsDir(res.st_mode):
-        close(f2)
-        return false
-    when not defined(nimInheritHandles) and declared(setInheritable) and
-         NoInheritFlag.len == 0:
-      if not setInheritable(getOsFileHandle(f2), false):
-        close(f2)
-        return false
-
-    result = true
-    f = cast[File](p)
-    if bufSize > 0 and bufSize <= high(cint).int:
-      discard c_setvbuf(f, nil, IOFBF, cast[csize_t](bufSize))
-    elif bufSize == 0:
-      discard c_setvbuf(f, nil, IONBF, 0)
-
-proc reopen*(f: File, filename: string, mode: FileMode = fmRead): bool {.
-  tags: [], benign.} =
-  ## reopens the file `f` with given `filename` and `mode`. This
-  ## is often used to redirect the `stdin`, `stdout` or `stderr`
-  ## file variables.
-  ##
-  ## Default mode is readonly. Returns true if the file could be reopened.
-  ##
-  ## The file handle associated with `f` won't be inheritable.
-  if freopen(filename, FormatOpen[mode], f) != nil:
-    when not defined(nimInheritHandles) and declared(setInheritable) and
-         NoInheritFlag.len == 0:
-      if not setInheritable(getOsFileHandle(f), false):
-        close(f)
-        return false
-    result = true
-
-proc open*(f: var File, filehandle: FileHandle,
-           mode: FileMode = fmRead): bool {.tags: [], raises: [], benign.} =
-  ## Creates a ``File`` from a `filehandle` with given `mode`.
-  ##
-  ## Default mode is readonly. Returns true if the file could be opened.
-  ##
-  ## The passed file handle will no longer be inheritable.
-  when not defined(nimInheritHandles) and declared(setInheritable):
-    let oshandle = when defined(windows): FileHandle getOsfhandle(filehandle) else: filehandle
-    if not setInheritable(oshandle, false):
-      return false
-  f = c_fdopen(filehandle, FormatOpen[mode])
-  result = f != nil
-
-proc open*(filename: string,
-            mode: FileMode = fmRead, bufSize: int = -1): File =
-  ## Opens a file named `filename` with given `mode`.
-  ##
-  ## Default mode is readonly. Raises an ``IOError`` if the file
-  ## could not be opened.
-  ##
-  ## The file handle associated with the resulting ``File`` is not inheritable.
-  if not open(result, filename, mode, bufSize):
-    sysFatal(IOError, "cannot open: " & filename)
-
-proc setFilePos*(f: File, pos: int64, relativeTo: FileSeekPos = fspSet) {.benign.} =
-  ## sets the position of the file pointer that is used for read/write
-  ## operations. The file's first byte has the index zero.
-  if c_fseek(f, pos, cint(relativeTo)) != 0:
-    raiseEIO("cannot set file position")
-
-proc getFilePos*(f: File): int64 {.benign.} =
-  ## retrieves the current position of the file pointer that is used to
-  ## read from the file `f`. The file's first byte has the index zero.
-  result = c_ftell(f)
-  if result < 0: raiseEIO("cannot retrieve file position")
-
-proc getFileSize*(f: File): int64 {.tags: [ReadIOEffect], benign.} =
-  ## retrieves the file size (in bytes) of `f`.
-  let oldPos = getFilePos(f)
-  discard c_fseek(f, 0, 2) # seek the end of the file
-  result = getFilePos(f)
-  setFilePos(f, oldPos)
-
-proc setStdIoUnbuffered*() {.tags: [], benign.} =
-  ## Configures `stdin`, `stdout` and `stderr` to be unbuffered.
-  when declared(stdout):
-    discard c_setvbuf(stdout, nil, IONBF, 0)
-  when declared(stderr):
-    discard c_setvbuf(stderr, nil, IONBF, 0)
-  when declared(stdin):
-    discard c_setvbuf(stdin, nil, IONBF, 0)
-
-when declared(stdout):
-  when defined(windows) and compileOption("threads"):
-    const insideRLocksModule = false
-    include "system/syslocks"
-
-    var echoLock: SysLock
-    initSysLock echoLock
-
-  proc echoBinSafe(args: openArray[string]) {.compilerproc.} =
-    when defined(androidNDK):
-      var s = ""
-      for arg in args:
-        s.add arg
-      android_log_print(ANDROID_LOG_VERBOSE, "nim", s)
-    else:
-      # flockfile deadlocks some versions of Android 5.x.x
-      when not defined(windows) and not defined(android) and not defined(nintendoswitch) and hostOS != "any":
-        proc flockfile(f: File) {.importc, nodecl.}
-        proc funlockfile(f: File) {.importc, nodecl.}
-        flockfile(stdout)
-      when defined(windows) and compileOption("threads"):
-        acquireSys echoLock
-      for s in args:
-        when defined(windows):
-          writeWindows(stdout, s)
-        else:
-          discard c_fwrite(s.cstring, cast[csize_t](s.len), 1, stdout)
-      const linefeed = "\n"
-      discard c_fwrite(linefeed.cstring, linefeed.len, 1, stdout)
-      discard c_fflush(stdout)
-      when not defined(windows) and not defined(android) and not defined(nintendoswitch) and hostOS != "any":
-        funlockfile(stdout)
-      when defined(windows) and compileOption("threads"):
-        releaseSys echoLock
-
-
-when defined(windows) and not defined(nimscript) and not defined(js):
-  # work-around C's sucking abstraction:
-  # BUGFIX: stdin and stdout should be binary files!
-  proc c_setmode(handle, mode: cint) {.
-    importc: when defined(bcc): "setmode" else: "_setmode",
-    header: "<io.h>".}
-  var
-    O_BINARY {.importc: "_O_BINARY", header: "<fcntl.h>".}: cint
-
-  # we use binary mode on Windows:
-  c_setmode(c_fileno(stdin), O_BINARY)
-  c_setmode(c_fileno(stdout), O_BINARY)
-  c_setmode(c_fileno(stderr), O_BINARY)
-
-when defined(windows) and appType == "console" and
-    not defined(nimDontSetUtf8CodePage) and not defined(nimscript):
-  proc setConsoleOutputCP(codepage: cuint): int32 {.stdcall, dynlib: "kernel32",
-    importc: "SetConsoleOutputCP".}
-  proc setConsoleCP(wCodePageID: cuint): int32 {.stdcall, dynlib: "kernel32",
-    importc: "SetConsoleCP".}
-
-  const Utf8codepage = 65001
-  discard setConsoleOutputCP(Utf8codepage)
-  discard setConsoleCP(Utf8codepage)
-
-proc readFile*(filename: string): TaintedString {.tags: [ReadIOEffect], benign.} =
-  ## Opens a file named `filename` for reading, calls `readAll
-  ## <#readAll,File>`_ and closes the file afterwards. Returns the string.
-  ## Raises an IO exception in case of an error. If you need to call
-  ## this inside a compile time macro you can use `staticRead
-  ## <system.html#staticRead,string>`_.
-  var f: File = nil
-  if open(f, filename):
-    try:
-      result = readAll(f)
-    finally:
-      close(f)
-  else:
-    sysFatal(IOError, "cannot open: " & filename)
-
-proc writeFile*(filename, content: string) {.tags: [WriteIOEffect], benign.} =
-  ## Opens a file named `filename` for writing. Then writes the
-  ## `content` completely to the file and closes the file afterwards.
-  ## Raises an IO exception in case of an error.
-  var f: File = nil
-  if open(f, filename, fmWrite):
-    try:
-      f.write(content)
-    finally:
-      close(f)
-  else:
-    sysFatal(IOError, "cannot open: " & filename)
-
-proc writeFile*(filename: string, content: openArray[byte]) {.since: (1, 1).} =
-  ## Opens a file named `filename` for writing. Then writes the
-  ## `content` completely to the file and closes the file afterwards.
-  ## Raises an IO exception in case of an error.
-  var f: File = nil
-  if open(f, filename, fmWrite):
-    try:
-      f.writeBuffer(unsafeAddr content[0], content.len)
-    finally:
-      close(f)
-  else:
-    raise newException(IOError, "cannot open: " & filename)
-
-proc readLines*(filename: string, n: Natural): seq[TaintedString] =
-  ## read `n` lines from the file named `filename`. Raises an IO exception
-  ## in case of an error. Raises EOF if file does not contain at least `n` lines.
-  ## Available at compile time. A line of text may be delimited by ``LF`` or ``CRLF``.
-  ## The newline character(s) are not part of the returned strings.
-  var f: File = nil
-  if open(f, filename):
-    try:
-      result = newSeq[TaintedString](n)
-      for i in 0 .. n - 1:
-        if not readLine(f, result[i]):
-          raiseEOF()
-    finally:
-      close(f)
-  else:
-    sysFatal(IOError, "cannot open: " & filename)
-
-template readLines*(filename: string): seq[TaintedString] {.deprecated: "use readLines with two arguments".} =
-  readLines(filename, 1)
-
-iterator lines*(filename: string): TaintedString {.tags: [ReadIOEffect].} =
-  ## Iterates over any line in the file named `filename`.
-  ##
-  ## If the file does not exist `IOError` is raised. The trailing newline
-  ## character(s) are removed from the iterated lines. Example:
-  ##
-  ## .. code-block:: nim
-  ##   import strutils
-  ##
-  ##   proc transformLetters(filename: string) =
-  ##     var buffer = ""
-  ##     for line in filename.lines:
-  ##       buffer.add(line.replace("a", "0") & '\x0A')
-  ##     writeFile(filename, buffer)
-  var f = open(filename, bufSize=8000)
-  try:
-    var res = TaintedString(newStringOfCap(80))
-    while f.readLine(res): yield res
-  finally:
-    close(f)
-
-iterator lines*(f: File): TaintedString {.tags: [ReadIOEffect].} =
-  ## Iterate over any line in the file `f`.
-  ##
-  ## The trailing newline character(s) are removed from the iterated lines.
-  ## Example:
-  ##
-  ## .. code-block:: nim
-  ##   proc countZeros(filename: File): tuple[lines, zeros: int] =
-  ##     for line in filename.lines:
-  ##       for letter in line:
-  ##         if letter == '0':
-  ##           result.zeros += 1
-  ##       result.lines += 1
-  var res = TaintedString(newStringOfCap(80))
-  while f.readLine(res): yield res
diff --git a/lib/system/iterators.nim b/lib/system/iterators.nim
index 99ad6eef6..125bee98f 100644
--- a/lib/system/iterators.nim
+++ b/lib/system/iterators.nim
@@ -1,14 +1,24 @@
-when defined(nimHasLentIterators) and not defined(nimWorkaround14447):
+## Default iterators for some Nim types.
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+when not defined(nimNoLentIterators):
   template lent2(T): untyped = lent T
 else:
   template lent2(T): untyped = T
 
+template unCheckedInc(x) =
+  {.push overflowChecks: off.}
+  inc(x)
+  {.pop.}
+
 iterator items*[T: not char](a: openArray[T]): lent2 T {.inline.} =
   ## Iterates over each item of `a`.
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator items*[T: char](a: openArray[T]): T {.inline.} =
   ## Iterates over each item of `a`.
@@ -18,32 +28,32 @@ iterator items*[T: char](a: openArray[T]): T {.inline.} =
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator mitems*[T](a: var openArray[T]): var T {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator items*[IX, T](a: array[IX, T]): T {.inline.} =
   ## Iterates over each item of `a`.
-  var i = low(IX)
-  if i <= high(IX):
+  when a.len > 0:
+    var i = low(IX)
     while true:
       yield a[i]
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator mitems*[IX, T](a: var array[IX, T]): var T {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
-  var i = low(IX)
-  if i <= high(IX):
+  when a.len > 0:
+    var i = low(IX)
     while true:
       yield a[i]
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator items*[T](a: set[T]): T {.inline.} =
   ## Iterates over each element of `a`. `items` iterates only over the
@@ -51,148 +61,188 @@ iterator items*[T](a: set[T]): T {.inline.} =
   ## able to hold).
   var i = low(T).int
   while i <= high(T).int:
-    if T(i) in a: yield T(i)
-    inc(i)
+    when T is enum and not defined(js):
+      if cast[T](i) in a: yield cast[T](i)
+    else:
+      if T(i) in a: yield T(i)
+    unCheckedInc(i)
 
 iterator items*(a: cstring): char {.inline.} =
   ## Iterates over each item of `a`.
-  when defined(js):
+  runnableExamples:
+    from std/sequtils import toSeq
+    assert toSeq("abc\0def".cstring) == @['a', 'b', 'c']
+    assert toSeq("abc".cstring) == @['a', 'b', 'c']
+  #[
+  assert toSeq(nil.cstring) == @[] # xxx fails with SIGSEGV
+  this fails with SIGSEGV; unclear whether we want to instead yield nothing
+  or pay a small price to check for `nil`, a benchmark is needed. Note that
+  other procs support `nil`.
+  ]#
+  template impl() =
     var i = 0
-    var L = len(a)
-    while i < L:
+    let n = len(a)
+    while i < n:
       yield a[i]
-      inc(i)
+      unCheckedInc(i)
+  when defined(js): impl()
   else:
-    var i = 0
-    while a[i] != '\0':
-      yield a[i]
-      inc(i)
+    when nimvm:
+      # xxx `cstring` should behave like c backend instead.
+      impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
 
 iterator mitems*(a: var cstring): var char {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
-  when defined(js):
+  # xxx this should give CT error in js RT.
+  runnableExamples:
+    from std/sugar import collect
+    var a = "abc\0def"
+    prepareMutation(a)
+    var b = a.cstring
+    let s = collect:
+      for bi in mitems(b):
+        if bi == 'b': bi = 'B'
+        bi
+    assert s == @['a', 'B', 'c']
+    assert b == "aBc"
+    assert a == "aBc\0def"
+
+  template impl() =
     var i = 0
-    var L = len(a)
-    while i < L:
+    let n = len(a)
+    while i < n:
       yield a[i]
-      inc(i)
+      unCheckedInc(i)
+  when defined(js): impl()
   else:
-    var i = 0
-    while a[i] != '\0':
-      yield a[i]
-      inc(i)
+    when nimvm: impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
 
-iterator items*(E: typedesc[enum]): E =
-  ## Iterates over the values of the enum ``E``.
+iterator items*[T: enum and Ordinal](E: typedesc[T]): T =
+  ## Iterates over the values of `E`.
+  ## See also `enumutils.items` for enums with holes.
+  runnableExamples:
+    type Goo = enum g0 = 2, g1, g2
+    from std/sequtils import toSeq
+    assert Goo.toSeq == [g0, g1, g2]
   for v in low(E) .. high(E):
     yield v
 
-iterator items*[T](s: HSlice[T, T]): T =
+iterator items*[T: Ordinal](s: Slice[T]): T =
   ## Iterates over the slice `s`, yielding each value between `s.a` and `s.b`
   ## (inclusively).
   for x in s.a .. s.b:
     yield x
 
 iterator pairs*[T](a: openArray[T]): tuple[key: int, val: T] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
   var i = 0
   while i < len(a):
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
 
 iterator mpairs*[T](a: var openArray[T]): tuple[key: int, val: var T]{.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  ## ``a[index]`` can be modified.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
   var i = 0
   while i < len(a):
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
 
 iterator pairs*[IX, T](a: array[IX, T]): tuple[key: IX, val: T] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  var i = low(IX)
-  if i <= high(IX):
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  when a.len > 0:
+    var i = low(IX)
     while true:
       yield (i, a[i])
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator mpairs*[IX, T](a: var array[IX, T]): tuple[key: IX, val: var T] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  ## ``a[index]`` can be modified.
-  var i = low(IX)
-  if i <= high(IX):
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  when a.len > 0:
+    var i = low(IX)
     while true:
       yield (i, a[i])
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator pairs*[T](a: seq[T]): tuple[key: int, val: T] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
   var i = 0
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator mpairs*[T](a: var seq[T]): tuple[key: int, val: var T] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  ## ``a[index]`` can be modified.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
   var i = 0
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator pairs*(a: string): tuple[key: int, val: char] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
   var i = 0
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator mpairs*(a: var string): tuple[key: int, val: var char] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  ## ``a[index]`` can be modified.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
   var i = 0
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator pairs*(a: cstring): tuple[key: int, val: char] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
   when defined(js):
     var i = 0
     var L = len(a)
     while i < L:
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
   else:
     var i = 0
     while a[i] != '\0':
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
 
 iterator mpairs*(a: var cstring): tuple[key: int, val: var char] {.inline.} =
-  ## Iterates over each item of `a`. Yields ``(index, a[index])`` pairs.
-  ## ``a[index]`` can be modified.
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
   when defined(js):
     var i = 0
     var L = len(a)
     while i < L:
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
   else:
     var i = 0
     while a[i] != '\0':
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
 
 iterator items*[T](a: seq[T]): lent2 T {.inline.} =
   ## Iterates over each item of `a`.
@@ -200,7 +250,7 @@ iterator items*[T](a: seq[T]): lent2 T {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator mitems*[T](a: var seq[T]): var T {.inline.} =
@@ -209,7 +259,7 @@ iterator mitems*[T](a: var seq[T]): var T {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator items*(a: string): char {.inline.} =
@@ -218,7 +268,7 @@ iterator items*(a: string): char {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator mitems*(a: var string): var char {.inline.} =
@@ -227,7 +277,7 @@ iterator mitems*(a: var string): var char {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 
@@ -235,24 +285,24 @@ iterator fields*[T: tuple|object](x: T): RootObj {.
   magic: "Fields", noSideEffect.} =
   ## Iterates over every field of `x`.
   ##
-  ## **Warning**: This really transforms the 'for' and unrolls the loop.
-  ## The current implementation also has a bug
-  ## that affects symbol binding in the loop body.
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug
+  ##   that affects symbol binding in the loop body.
   runnableExamples:
     var t = (1, "foo")
-    for v in fields(t): v = default(type(v))
+    for v in fields(t): v = default(typeof(v))
     doAssert t == (0, "")
 
 iterator fields*[S:tuple|object, T:tuple|object](x: S, y: T): tuple[key: string, val: RootObj] {.
   magic: "Fields", noSideEffect.} =
   ## Iterates over every field of `x` and `y`.
   ##
-  ## **Warning**: This really transforms the 'for' and unrolls the loop.
-  ## The current implementation also has a bug that affects symbol binding
-  ## in the loop body.
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
   runnableExamples:
     var t1 = (1, "foo")
-    var t2 = default(type(t1))
+    var t2 = default(typeof(t1))
     for v1, v2 in fields(t1, t2): v2 = v1
     doAssert t1 == t2
 
@@ -261,16 +311,16 @@ iterator fieldPairs*[T: tuple|object](x: T): tuple[key: string, val: RootObj] {.
   ## Iterates over every field of `x` returning their name and value.
   ##
   ## When you iterate over objects with different field types you have to use
-  ## the compile time ``when`` instead of a runtime ``if`` to select the code
+  ## the compile time `when` instead of a runtime `if` to select the code
   ## you want to run for each type. To perform the comparison use the `is
   ## operator <manual.html#generics-is-operator>`_.
-  ## Another way to do the same without ``when`` is to leave the task of
+  ## Another way to do the same without `when` is to leave the task of
   ## picking the appropriate code to a secondary proc which you overload for
   ## each field type and pass the `value` to.
   ##
-  ## **Warning**: This really transforms the 'for' and unrolls the loop. The
-  ## current implementation also has a bug that affects symbol binding in the
-  ## loop body.
+  ## .. warning:: This really transforms the 'for' and unrolls the loop. The
+  ##   current implementation also has a bug that affects symbol binding in the
+  ##   loop body.
   runnableExamples:
     type
       Custom = object
@@ -289,9 +339,9 @@ iterator fieldPairs*[S: tuple|object, T: tuple|object](x: S, y: T): tuple[
   magic: "FieldPairs", noSideEffect.} =
   ## Iterates over every field of `x` and `y`.
   ##
-  ## **Warning**: This really transforms the 'for' and unrolls the loop.
-  ## The current implementation also has a bug that affects symbol binding
-  ## in the loop body.
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
   runnableExamples:
     type Foo = object
       x1: int
diff --git a/lib/system/iterators_1.nim b/lib/system/iterators_1.nim
index 262bd6aab..d00e3f823 100644
--- a/lib/system/iterators_1.nim
+++ b/lib/system/iterators_1.nim
@@ -9,15 +9,20 @@ iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
   ##
   ## `T` may be any ordinal type, `step` may only be positive.
   ##
-  ## **Note**: This fails to count to ``low(int)`` if T = int for
+  ## **Note**: This fails to count to `low(int)` if T = int for
   ## efficiency reasons.
-  ##
-  ## .. code-block:: Nim
-  ##   for i in countdown(7, 3):
-  ##     echo i # => 7; 6; 5; 4; 3
-  ##
-  ##   for i in countdown(9, 2, 3):
-  ##     echo i # => 9; 6; 3
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countdown(7, 3):
+        i
+
+    assert x == @[7, 6, 5, 4, 3]
+
+    let y = collect(newseq):
+      for i in countdown(9, 2, 3):
+        i
+    assert y == @[9, 6, 3]
   when T is (uint|uint64):
     var res = a
     while res >= b:
@@ -27,7 +32,10 @@ iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
   elif T is IntLikeForCount and T is Ordinal:
     var res = int(a)
     while res >= int(b):
-      yield T(res)
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
       dec(res, step)
   else:
     var res = a
@@ -35,148 +43,106 @@ iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
       yield res
       dec(res, step)
 
-when defined(nimNewRoof):
-  iterator countup*[T](a, b: T, step: Positive = 1): T {.inline.} =
-    ## Counts from ordinal value `a` to `b` (inclusive) with the given
-    ## step count.
-    ##
-    ## `T` may be any ordinal type, `step` may only be positive.
-    ##
-    ## **Note**: This fails to count to ``high(int)`` if T = int for
-    ## efficiency reasons.
-    ##
-    ## .. code-block:: Nim
-    ##   for i in countup(3, 7):
-    ##     echo i # => 3; 4; 5; 6; 7
-    ##
-    ##   for i in countup(2, 9, 3):
-    ##     echo i # => 2; 5; 8
-    mixin inc
-    when T is IntLikeForCount and T is Ordinal:
-      var res = int(a)
-      while res <= int(b):
-        yield T(res)
-        inc(res, step)
-    else:
-      var res = a
-      while res <= b:
-        yield res
-        inc(res, step)
+iterator countup*[T](a, b: T, step: Positive = 1): T {.inline.} =
+  ## Counts from ordinal value `a` to `b` (inclusive) with the given
+  ## step count.
+  ##
+  ## `T` may be any ordinal type, `step` may only be positive.
+  ##
+  ## **Note**: This fails to count to `high(int)` if T = int for
+  ## efficiency reasons.
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countup(3, 7):
+        i
+    
+    assert x == @[3, 4, 5, 6, 7]
 
-  iterator `..`*[T](a, b: T): T {.inline.} =
-    ## An alias for `countup(a, b, 1)`.
-    ##
-    ## See also:
-    ## * [..<](#..<.i,T,T)
-    ##
-    ## .. code-block:: Nim
-    ##   for i in 3 .. 7:
-    ##     echo i # => 3; 4; 5; 6; 7
-    mixin inc
-    when T is IntLikeForCount and T is Ordinal:
-      var res = int(a)
-      while res <= int(b):
+    let y = collect(newseq):
+      for i in countup(2, 9, 3):
+        i
+    assert y == @[2, 5, 8]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
         yield T(res)
-        inc(res)
-    else:
-      var res = a
-      while res <= b:
-        yield res
-        inc(res)
-
-  template dotdotImpl(t) {.dirty.} =
-    iterator `..`*(a, b: t): t {.inline.} =
-      ## A type specialized version of ``..`` for convenience so that
-      ## mixing integer types works better.
-      ##
-      ## See also:
-      ## * [..<](#..<.i,T,T)
-      var res = a
-      while res <= b:
-        yield res
-        inc(res)
-
-  dotdotImpl(int64)
-  dotdotImpl(int32)
-  dotdotImpl(uint64)
-  dotdotImpl(uint32)
-
-  iterator `..<`*[T](a, b: T): T {.inline.} =
-    mixin inc
-    var i = a
-    while i < b:
-      yield i
-      inc i
+      inc(res, step)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res, step)
 
-  template dotdotLessImpl(t) {.dirty.} =
-    iterator `..<`*(a, b: t): t {.inline.} =
-      ## A type specialized version of ``..<`` for convenience so that
-      ## mixing integer types works better.
-      var res = a
-      while res < b:
-        yield res
-        inc(res)
+iterator `..`*[T](a, b: T): T {.inline.} =
+  ## An alias for `countup(a, b, 1)`.
+  ##
+  ## See also:
+  ## * [..<](#..<.i,T,T)
+  runnableExamples:
+    import std/sugar
 
-  dotdotLessImpl(int64)
-  dotdotLessImpl(int32)
-  dotdotLessImpl(uint64)
-  dotdotLessImpl(uint32)
+    let x = collect(newSeq):
+      for i in 3 .. 7:
+        i
 
-else: # not defined(nimNewRoof)
-  iterator countup*[S, T](a: S, b: T, step = 1): T {.inline.} =
-    ## Counts from ordinal value `a` up to `b` (inclusive) with the given
-    ## step count.
-    ##
-    ## `S`, `T` may be any ordinal type, `step` may only be positive.
-    ##
-    ## **Note**: This fails to count to ``high(int)`` if T = int for
-    ## efficiency reasons.
-    ##
-    ## .. code-block:: Nim
-    ##   for i in countup(3, 7):
-    ##     echo i # => 3; 4; 5; 6; 7
-    ##
-    ##   for i in countup(2, 9, 3):
-    ##     echo i # => 2; 5; 8
-    when T is IntLikeForCount and T is Ordinal:
-      var res = int(a)
-      while res <= int(b):
+    assert x == @[3, 4, 5, 6, 7]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
         yield T(res)
-        inc(res, step)
-    else:
-      var res = T(a)
-      while res <= b:
-        yield res
-        inc(res, step)
+      inc(res)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
 
-  iterator `..`*[S, T](a: S, b: T): T {.inline.} =
-    ## An alias for `countup(a, b, 1)`.
+template dotdotImpl(t) {.dirty.} =
+  iterator `..`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..` for convenience so that
+    ## mixing integer types works better.
     ##
     ## See also:
     ## * [..<](#..<.i,T,T)
-    ##
-    ## .. code-block:: Nim
-    ##   for i in 3 .. 7:
-    ##     echo i # => 3; 4; 5; 6; 7
-    mixin inc
-    when T is IntLikeForCount and T is Ordinal:
-      var res = int(a)
-      while res <= int(b):
-        yield T(res)
-        inc(res)
-    else:
-      var res = T(a)
-      while res <= b:
-        yield res
-        inc(res)
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
+
+dotdotImpl(int64)
+dotdotImpl(int32)
+dotdotImpl(uint64)
+dotdotImpl(uint32)
+
+iterator `..<`*[T](a, b: T): T {.inline.} =
+  mixin inc
+  var i = a
+  while i < b:
+    yield i
+    inc i
 
-  iterator `..<`*[S, T](a: S, b: T): T {.inline.} =
-    mixin inc
-    var i = T(a)
-    while i < b:
-      yield i
-      inc i
+template dotdotLessImpl(t) {.dirty.} =
+  iterator `..<`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..<` for convenience so that
+    ## mixing integer types works better.
+    var res = a
+    while res < b:
+      yield res
+      inc(res)
 
+dotdotLessImpl(int64)
+dotdotLessImpl(int32)
+dotdotLessImpl(uint64)
+dotdotLessImpl(uint32)
 
 iterator `||`*[S, T](a: S, b: T, annotation: static string = "parallel for"): T {.
   inline, magic: "OmpParFor", sideEffect.} =
@@ -189,9 +155,9 @@ iterator `||`*[S, T](a: S, b: T, annotation: static string = "parallel for"): T
   ## for further information.
   ##
   ## Note that the compiler maps that to
-  ## the ``#pragma omp parallel for`` construct of `OpenMP`:idx: and as
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
   ## such isn't aware of the parallelism in your code! Be careful! Later
-  ## versions of ``||`` will get proper support by Nim's code generator
+  ## versions of `||` will get proper support by Nim's code generator
   ## and GC.
   discard
 
@@ -207,8 +173,8 @@ iterator `||`*[S, T](a: S, b: T, step: Positive, annotation: static string = "pa
   ## for further information.
   ##
   ## Note that the compiler maps that to
-  ## the ``#pragma omp parallel for`` construct of `OpenMP`:idx: and as
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
   ## such isn't aware of the parallelism in your code! Be careful! Later
-  ## versions of ``||`` will get proper support by Nim's code generator
+  ## versions of `||` will get proper support by Nim's code generator
   ## and GC.
   discard
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 705a6a208..5599240fd 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -49,7 +49,7 @@ proc nimCharToStr(x: char): string {.compilerproc.} =
   result[0] = x
 
 proc isNimException(): bool {.asmNoStackFrame.} =
-  asm "return `lastJSError` && `lastJSError`.m_type;"
+  {.emit: "return `lastJSError` && `lastJSError`.m_type;".}
 
 proc getCurrentException*(): ref Exception {.compilerRtl, benign.} =
   if isNimException(): result = cast[ref Exception](lastJSError)
@@ -69,6 +69,13 @@ proc getCurrentExceptionMsg*(): string =
         return $msg
   return ""
 
+proc setCurrentException*(exc: ref Exception) =
+  lastJSError = cast[PJSError](exc)
+
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  ## Used to set up exception handling for closure iterators
+  setCurrentException(e)
+
 proc auxWriteStackTrace(f: PCallFrame): string =
   type
     TempFrame = tuple[procname: cstring, line: int, filename: cstring]
@@ -105,6 +112,11 @@ proc rawWriteStackTrace(): string =
   else:
     result = "No stack traceback available\n"
 
+proc writeStackTrace() =
+  var trace = rawWriteStackTrace()
+  trace.setLen(trace.len - 1)
+  echo trace
+
 proc getStackTrace*(): string = rawWriteStackTrace()
 proc getStackTrace*(e: ref Exception): string = e.trace
 
@@ -122,7 +134,8 @@ proc unhandledException(e: ref Exception) {.
   when NimStackTrace:
     add(buf, rawWriteStackTrace())
   let cbuf = cstring(buf)
-  framePtr = nil
+  when NimStackTrace:
+    framePtr = nil
   {.emit: """
   if (typeof(Error) !== "undefined") {
     throw new Error(`cbuf`);
@@ -139,7 +152,7 @@ proc raiseException(e: ref Exception, ename: cstring) {.
     unhandledException(e)
   when NimStackTrace:
     e.trace = rawWriteStackTrace()
-  asm "throw `e`;"
+  {.emit: "throw `e`;".}
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
   if lastJSError == nil:
@@ -149,7 +162,7 @@ proc reraiseException() {.compilerproc, asmNoStackFrame.} =
       if isNimException():
         unhandledException(cast[ref Exception](lastJSError))
 
-    asm "throw lastJSError;"
+    {.emit: "throw lastJSError;".}
 
 proc raiseOverflow {.exportc: "raiseOverflow", noreturn, compilerproc.} =
   raise newException(OverflowDefect, "over- or underflow")
@@ -163,11 +176,11 @@ proc raiseRangeError() {.compilerproc, noreturn.} =
 proc raiseIndexError(i, a, b: int) {.compilerproc, noreturn.} =
   raise newException(IndexDefect, formatErrorIndexBound(int(i), int(a), int(b)))
 
-proc raiseFieldError(f: string) {.compilerproc, noreturn.} =
-  raise newException(FieldDefect, f)
+proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noreturn.} =
+  raise newException(FieldDefect, formatFieldDefect(f, discVal))
 
 proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var i = 0; i < arguments.length; ++i) {
       var x = arguments[i];
@@ -180,13 +193,12 @@ proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
       }
     }
     return result;
-  """
+  """.}
 
 proc makeNimstrLit(c: cstring): string {.asmNoStackFrame, compilerproc.} =
   {.emit: """
-  var ln = `c`.length;
-  var result = new Array(ln);
-  for (var i = 0; i < ln; ++i) {
+  var result = [];
+  for (var i = 0; i < `c`.length; ++i) {
     result[i] = `c`.charCodeAt(i);
   }
   return result;
@@ -269,62 +281,64 @@ proc toJSStr(s: string): cstring {.compilerproc.} =
   result = join(res)
 
 proc mnewString(len: int): string {.asmNoStackFrame, compilerproc.} =
-  asm """
-    return new Array(`len`);
-  """
+  {.emit: """
+    var result = new Array(`len`);
+    for (var i = 0; i < `len`; i++) {result[i] = 0;}
+    return result;
+  """.}
 
 proc SetCard(a: int): int {.compilerproc, asmNoStackFrame.} =
   # argument type is a fake
-  asm """
+  {.emit: """
     var result = 0;
     for (var elem in `a`) { ++result; }
     return result;
-  """
+  """.}
 
 proc SetEq(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     for (var elem in `b`) { if (!`a`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLe(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLt(a, b: int): bool {.compilerproc.} =
   result = SetLe(a, b) and not SetEq(a, b)
 
 proc SetMul(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
 proc SetPlus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) { result[elem] = true; }
     for (var elem in `b`) { result[elem] = true; }
     return result;
-  """
+  """.}
 
 proc SetMinus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (!`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
 proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` == `b`) return 0;
     if (!`a`) return -1;
     if (!`b`) return 1;
@@ -333,13 +347,18 @@ proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
       if (result != 0) return result;
     }
     return `a`.length - `b`.length;
-  """
+  """.}
 
 proc cmp(x, y: string): int =
-  return cmpStrings(x, y)
+  when nimvm:
+    if x == y: result = 0
+    elif x < y: result = -1
+    else: result = 1
+  else:
+    result = cmpStrings(x, y)
 
 proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` == `b`) return true;
     if (`a` === null && `b`.length == 0) return true;
     if (`b` === null && `a`.length == 0) return true;
@@ -349,29 +368,29 @@ proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
     for (var i = 0; i < alen; ++i)
       if (`a`[i] != `b`[i]) return false;
     return true;
-  """
+  """.}
 
 when defined(kwin):
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       print(buf);
-    """
+    """.}
 
 elif not defined(nimOldEcho):
   proc ewriteln(x: cstring) = log(x)
 
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       console.log(buf);
-    """
+    """.}
 
 else:
   proc ewriteln(x: cstring) =
@@ -399,84 +418,84 @@ else:
 
 # Arithmetic:
 proc checkOverflowInt(a: int) {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` > 2147483647 || `a` < -2147483648) `raiseOverflow`();
-  """
+  """.}
 
 proc addInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` + `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc subInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` - `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc mulInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` * `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc divInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` / `b`);
-  """
+  """.}
 
 proc modInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` % `b`);
-  """
+  """.}
 
-proc checkOverflowInt64(a: int) {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`a` > 9223372036854775807 || `a` < -9223372036854775808) `raiseOverflow`();
-  """
+proc checkOverflowInt64(a: int64) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 9223372036854775807n || `a` < -9223372036854775808n) `raiseOverflow`();
+  """.}
 
-proc addInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc addInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` + `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc subInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc subInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` - `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc mulInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc mulInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` * `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc divInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` / `b`);
-  """
+proc divInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` / `b`;
+  """.}
 
-proc modInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` % `b`);
-  """
+proc modInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` % `b`;
+  """.}
 
 proc negInt(a: int): int {.compilerproc.} =
   result = a*(-1)
@@ -484,53 +503,16 @@ proc negInt(a: int): int {.compilerproc.} =
 proc negInt64(a: int64): int64 {.compilerproc.} =
   result = a*(-1)
 
-proc nimFloatToString(a: float): cstring {.compilerproc.} =
-  ## ensures the result doesn't print like an integer, ie return 2.0, not 2
-  asm """
-    function nimOnlyDigitsOrMinus(n) {
-      return n.toString().match(/^-?\d+$/);
-    }
-    if (Number.isSafeInteger(`a`)) `result` =  `a`+".0"
-    else {
-      `result` = `a`+""
-      if(nimOnlyDigitsOrMinus(`result`)){
-        `result` = `a`+".0"
-      }
-    }
-  """
-
 proc absInt(a: int): int {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
 proc absInt64(a: int64): int64 {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
-when not defined(nimNoZeroExtendMagic):
-  proc ze*(a: int): int {.compilerproc.} =
-    result = a
-
-  proc ze64*(a: int64): int64 {.compilerproc.} =
-    result = a
-
-  proc toU8*(a: int): int8 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
-  proc toU16*(a: int): int16 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
-  proc toU32*(a: int64): int32 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
 proc nimMin(a, b: int): int {.compilerproc.} = return if a <= b: a else: b
 proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 
-proc chckNilDisp(p: pointer) {.compilerproc.} =
+proc chckNilDisp(p: JSRef) {.compilerproc.} =
   if p == nil:
     sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
 
@@ -548,22 +530,22 @@ proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
   of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
-    """
+    """.}
   of nkList:
-    asm """
+    {.emit: """
     for (var i = 0; i < `n`.sons.length; i++) {
       nimCopyAux(`dest`, `src`, `n`.sons[i]);
     }
-    """
+    """.}
   of nkCase:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
       for (var i = 0; i < `n`.sons.length; ++i) {
         nimCopyAux(`dest`, `src`, `n`.sons[i][1]);
       }
-    """
+    """.}
 
 proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
@@ -571,9 +553,9 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
     if not isFatPointer(ti):
       result = src
     else:
-      asm "`result` = [`src`[0], `src`[1]];"
+      {.emit: "`result` = [`src`[0], `src`[1]];".}
   of tySet:
-    asm """
+    {.emit: """
       if (`dest` === null || `dest` === undefined) {
         `dest` = {};
       }
@@ -582,80 +564,72 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
       }
       for (var key in `src`) { `dest`[key] = `src`[key]; }
       `result` = `dest`;
-    """
+    """.}
   of tyTuple, tyObject:
     if ti.base != nil: result = nimCopy(dest, src, ti.base)
     elif ti.kind == tyObject:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;".}
     else:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;".}
     nimCopyAux(result, src, ti.node)
-  of tySequence, tyArrayConstr, tyOpenArray, tyArray:
-    asm """
+  of tyArrayConstr, tyArray:
+    # In order to prevent a type change (TypedArray -> Array) and to have better copying performance,
+    # arrays constructors are considered separately
+    {.emit: """
+      if(ArrayBuffer.isView(`src`)) { 
+        if(`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+          `dest` = new `src`.constructor(`src`);
+        } else {
+          `dest`.set(`src`, 0);
+        }
+        `result` = `dest`;
+      } else {
+        if (`src` === null) {
+          `result` = null;
+        }
+        else {
+          if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+            `dest` = new Array(`src`.length);
+          }
+          `result` = `dest`;
+          for (var i = 0; i < `src`.length; ++i) {
+            `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+          }
+        }
+      }
+    """.}
+  of tySequence, tyOpenArray:
+    {.emit: """
       if (`src` === null) {
         `result` = null;
       }
       else {
-        if (`dest` === null || `dest` === undefined) {
+        if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
           `dest` = new Array(`src`.length);
         }
-        else {
-          `dest`.length = `src`.length;
-        }
         `result` = `dest`;
         for (var i = 0; i < `src`.length; ++i) {
           `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
         }
       }
-    """
+    """.}
   of tyString:
-    asm """
+    {.emit: """
       if (`src` !== null) {
         `result` = `src`.slice(0);
       }
-    """
+    """.}
   else:
     result = src
 
-proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
-  asm "`result` = null;"
-  case ti.kind
-  of tyPtr, tyRef, tyVar, tyNil:
-    if isFatPointer(ti):
-      asm """
-        `result` = [null, 0];
-      """
-  of tySet:
-    asm """
-      `result` = {};
-    """
-  of tyTuple, tyObject:
-    if ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
-    else:
-      asm "`result` = {};"
-  of tySequence, tyOpenArray:
-    asm """
-      `result` = [];
-    """
-  of tyArrayConstr, tyArray:
-    asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = genericReset(`x`[i], `ti`.base);
-      }
-    """
-  else:
-    discard
-
 proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
                 asmNoStackFrame, compilerproc.} =
   # types are fake
-  asm """
+  {.emit: """
     var result = new Array(`len`);
     for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
     return result;
-  """
+  """.}
 
 proc chckIndx(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
@@ -684,11 +658,12 @@ proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
   return true
 
 proc addChar(x: string, c: char) {.compilerproc, asmNoStackFrame.} =
-  asm "`x`.push(`c`);"
+  {.emit: "`x`.push(`c`);".}
 
 {.pop.}
 
 proc tenToThePowerOf(b: int): BiggestFloat =
+  # xxx deadcode
   var b = b
   var a = 10.0
   result = 1.0
@@ -702,86 +677,75 @@ proc tenToThePowerOf(b: int): BiggestFloat =
 const
   IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
 
-# XXX use JS's native way here
-proc nimParseBiggestFloat(s: string, number: var BiggestFloat, start = 0): int {.
-                          compilerproc.} =
-  var
-    esign = 1.0
-    sign = 1.0
-    i = start
-    exponent: int
-    flags: int
-  number = 0.0
+
+proc parseFloatNative(a: openarray[char]): float =
+  var str = ""
+  for x in a:
+    str.add x
+
+  let cstr = cstring str
+
+  {.emit: """
+  `result` = Number(`cstr`);
+  """.}
+
+proc nimParseBiggestFloat(s: openarray[char], number: var BiggestFloat): int {.compilerproc.} =
+  var sign: bool
+  var i = 0
   if s[i] == '+': inc(i)
   elif s[i] == '-':
-    sign = -1.0
+    sign = true
     inc(i)
   if s[i] == 'N' or s[i] == 'n':
     if s[i+1] == 'A' or s[i+1] == 'a':
       if s[i+2] == 'N' or s[i+2] == 'n':
         if s[i+3] notin IdentChars:
           number = NaN
-          return i+3 - start
+          return i+3
     return 0
   if s[i] == 'I' or s[i] == 'i':
     if s[i+1] == 'N' or s[i+1] == 'n':
       if s[i+2] == 'F' or s[i+2] == 'f':
         if s[i+3] notin IdentChars:
-          number = Inf*sign
-          return i+3 - start
+          number = if sign: -Inf else: Inf
+          return i+3
     return 0
-  while s[i] in {'0'..'9'}:
-    # Read integer part
-    flags = flags or 1
-    number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
+
+  var buf: string
+    # we could also use an `array[char, N]` buffer to avoid reallocs, or
+    # use a 2-pass algorithm that first computes the length.
+  if sign: buf.add '-'
+  template addInc =
+    buf.add s[i]
     inc(i)
+  template eatUnderscores =
     while s[i] == '_': inc(i)
-  # Decimal?
-  if s[i] == '.':
-    var hd = 1.0
+  while s[i] in {'0'..'9'}: # Read integer part
+    buf.add s[i]
     inc(i)
-    while s[i] in {'0'..'9'}:
-      # Read fractional part
-      flags = flags or 2
-      number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
-      hd = hd * 10.0
-      inc(i)
-      while s[i] == '_': inc(i)
-    number = number / hd # this complicated way preserves precision
+    eatUnderscores()
+  if s[i] == '.': # Decimal?
+    addInc()
+    while s[i] in {'0'..'9'}: # Read fractional part
+      addInc()
+      eatUnderscores()
   # Again, read integer and fractional part
-  if flags == 0: return 0
-  # Exponent?
-  if s[i] in {'e', 'E'}:
-    inc(i)
-    if s[i] == '+':
-      inc(i)
-    elif s[i] == '-':
-      esign = -1.0
-      inc(i)
-    if s[i] notin {'0'..'9'}:
-      return 0
+  if buf.len == ord(sign): return 0
+  if s[i] in {'e', 'E'}: # Exponent?
+    addInc()
+    if s[i] == '+': inc(i)
+    elif s[i] == '-': addInc()
+    if s[i] notin {'0'..'9'}: return 0
     while s[i] in {'0'..'9'}:
-      exponent = exponent * 10 + ord(s[i]) - ord('0')
-      inc(i)
-      while s[i] == '_': inc(i)
-  # Calculate Exponent
-  let hd = tenToThePowerOf(exponent)
-  if esign > 0.0: number = number * hd
-  else:           number = number / hd
-  # evaluate sign
-  number = number * sign
-  result = i - start
-
-when defined(nodejs):
-  # Deprecated. Use `alert` defined in dom.nim
-  proc alert*(s: cstring) {.importc: "console.log", nodecl, deprecated.}
-else:
-  # Deprecated. Use `alert` defined in dom.nim
-  proc alert*(s: cstring) {.importc, nodecl, deprecated.}
+      addInc()
+      eatUnderscores()
+  number = parseFloatNative(buf)
+  result = i
 
 # Workaround for IE, IE up to version 11 lacks 'Math.trunc'. We produce
 # 'Math.trunc' for Nim's ``div`` and ``mod`` operators:
-const jsMathTrunc = """
+when defined(nimJsMathTruncPolyfill):
+  {.emit: """
 if (!Math.trunc) {
   Math.trunc = function(v) {
     v = +v;
@@ -789,5 +753,16 @@ if (!Math.trunc) {
     return (v - v % 1) || (v < 0 ? -0 : v === 0 ? v : 0);
   };
 }
-"""
-when not defined(nodejs): {.emit: jsMathTrunc .}
+""".}
+
+proc cmpClosures(a, b: JSRef): bool {.compilerproc, asmNoStackFrame.} =
+  # Both `a` and `b` need to be a closure
+  {.emit: """
+    if (`a` !== null && `a`.ClP_0 !== undefined &&
+        `b` !== null && `b`.ClP_0 !== undefined) {
+      return `a`.ClP_0 == `b`.ClP_0 && `a`.ClE_0 == `b`.ClE_0;
+    } else {
+      return `a` == `b`;
+    }
+  """
+  .}
diff --git a/lib/system/memalloc.nim b/lib/system/memalloc.nim
index 178d199b8..a94d0995c 100644
--- a/lib/system/memalloc.nim
+++ b/lib/system/memalloc.nim
@@ -1,38 +1,51 @@
 when notJSnotNims:
   proc zeroMem*(p: pointer, size: Natural) {.inline, noSideEffect,
-    tags: [], locks: 0, raises: [].}
-    ## Overwrites the contents of the memory at ``p`` with the value 0.
+    tags: [], raises: [].}
+    ## Overwrites the contents of the memory at `p` with the value 0.
     ##
-    ## Exactly ``size`` bytes will be overwritten. Like any procedure
+    ## Exactly `size` bytes will be overwritten. Like any procedure
     ## dealing with raw memory this is **unsafe**.
 
   proc copyMem*(dest, source: pointer, size: Natural) {.inline, benign,
-    tags: [], locks: 0, raises: [].}
-    ## Copies the contents from the memory at ``source`` to the memory
-    ## at ``dest``.
-    ## Exactly ``size`` bytes will be copied. The memory
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
+    ## Exactly `size` bytes will be copied. The memory
     ## regions may not overlap. Like any procedure dealing with raw
     ## memory this is **unsafe**.
 
   proc moveMem*(dest, source: pointer, size: Natural) {.inline, benign,
-    tags: [], locks: 0, raises: [].}
-    ## Copies the contents from the memory at ``source`` to the memory
-    ## at ``dest``.
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
     ##
-    ## Exactly ``size`` bytes will be copied. The memory
-    ## regions may overlap, ``moveMem`` handles this case appropriately
-    ## and is thus somewhat more safe than ``copyMem``. Like any procedure
+    ## Exactly `size` bytes will be copied. The memory
+    ## regions may overlap, `moveMem` handles this case appropriately
+    ## and is thus somewhat more safe than `copyMem`. Like any procedure
     ## dealing with raw memory this is still **unsafe**, though.
 
   proc equalMem*(a, b: pointer, size: Natural): bool {.inline, noSideEffect,
-    tags: [], locks: 0, raises: [].}
-    ## Compares the memory blocks ``a`` and ``b``. ``size`` bytes will
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
     ## be compared.
     ##
     ## If the blocks are equal, `true` is returned, `false`
     ## otherwise. Like any procedure dealing with raw memory this is
     ## **unsafe**.
 
+  proc cmpMem*(a, b: pointer, size: Natural): int {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
+    ## be compared.
+    ##
+    ## Returns:
+    ## * a value less than zero, if `a < b`
+    ## * a value greater than zero, if `a > b`
+    ## * zero, if `a == b`
+    ##
+    ## Like any procedure dealing with raw memory this is
+    ## **unsafe**.
+
 when hasAlloc and not defined(js):
 
   proc allocImpl*(size: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
@@ -67,7 +80,7 @@ when hasAlloc and not defined(js):
 
   when defined(nimAllocStats):
     var stats: AllocStats
-    template incStat(what: untyped) = inc stats.what
+    template incStat(what: untyped) = atomicInc stats.what
     proc getAllocStats*(): AllocStats = stats
 
   else:
@@ -75,23 +88,23 @@ when hasAlloc and not defined(js):
     proc getAllocStats*(): AllocStats = discard
 
   template alloc*(size: Natural): pointer =
-    ## Allocates a new memory block with at least ``size`` bytes.
+    ## Allocates a new memory block with at least `size` bytes.
     ##
-    ## The block has to be freed with `realloc(block, 0) <#realloc,pointer,Natural>`_
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
     ## or `dealloc(block) <#dealloc,pointer>`_.
     ## The block is not initialized, so reading
     ## from it before writing to it is undefined behaviour!
     ##
     ## The allocated memory belongs to its allocating thread!
-    ## Use `allocShared <#allocShared,Natural>`_ to allocate from a shared heap.
+    ## Use `allocShared <#allocShared.t,Natural>`_ to allocate from a shared heap.
     ##
     ## See also:
-    ## * `alloc0 <#alloc0,Natural>`_
+    ## * `alloc0 <#alloc0.t,Natural>`_
     incStat(allocCount)
     allocImpl(size)
 
   proc createU*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
-    ## Allocates a new memory block with at least ``T.sizeof * size`` bytes.
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
     ##
     ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
     ## or `dealloc(block) <#dealloc,pointer>`_.
@@ -106,20 +119,20 @@ when hasAlloc and not defined(js):
     cast[ptr T](alloc(T.sizeof * size))
 
   template alloc0*(size: Natural): pointer =
-    ## Allocates a new memory block with at least ``size`` bytes.
+    ## Allocates a new memory block with at least `size` bytes.
     ##
-    ## The block has to be freed with `realloc(block, 0) <#realloc,pointer,Natural>`_
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
     ## or `dealloc(block) <#dealloc,pointer>`_.
     ## The block is initialized with all bytes containing zero, so it is
-    ## somewhat safer than  `alloc <#alloc,Natural>`_.
+    ## somewhat safer than  `alloc <#alloc.t,Natural>`_.
     ##
     ## The allocated memory belongs to its allocating thread!
-    ## Use `allocShared0 <#allocShared0,Natural>`_ to allocate from a shared heap.
+    ## Use `allocShared0 <#allocShared0.t,Natural>`_ to allocate from a shared heap.
     incStat(allocCount)
     alloc0Impl(size)
 
   proc create*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
-    ## Allocates a new memory block with at least ``T.sizeof * size`` bytes.
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
     ##
     ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
     ## or `dealloc(block) <#dealloc,pointer>`_.
@@ -134,13 +147,13 @@ when hasAlloc and not defined(js):
     ## Grows or shrinks a given memory block.
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``realloc`` calls ``dealloc(p)``.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
     ## In other cases the block has to be freed with
     ## `dealloc(block) <#dealloc,pointer>`_.
     ##
     ## The allocated memory belongs to its allocating thread!
-    ## Use `reallocShared <#reallocShared,pointer,Natural>`_ to reallocate
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
     ## from a shared heap.
     reallocImpl(p, newSize)
 
@@ -148,8 +161,8 @@ when hasAlloc and not defined(js):
     ## Grows or shrinks a given memory block.
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``realloc`` calls ``dealloc(p)``.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
     ## In other cases the block has to be freed with
     ## `dealloc(block) <#dealloc,pointer>`_.
     ##
@@ -157,7 +170,7 @@ when hasAlloc and not defined(js):
     ## somewhat safer then realloc
     ##
     ## The allocated memory belongs to its allocating thread!
-    ## Use `reallocShared <#reallocShared,pointer,Natural>`_ to reallocate
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
     ## from a shared heap.
     realloc0Impl(p, oldSize, newSize)
 
@@ -165,18 +178,18 @@ when hasAlloc and not defined(js):
     ## Grows or shrinks a given memory block.
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``T.sizeof * newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``resize`` calls ``dealloc(p)``.
-    ## In other cases the block has to be freed with ``free``.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resize` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with `free`.
     ##
     ## The allocated memory belongs to its allocating thread!
     ## Use `resizeShared <#resizeShared,ptr.T,Natural>`_ to reallocate
     ## from a shared heap.
     cast[ptr T](realloc(p, T.sizeof * newSize))
 
-  template dealloc*(p: pointer) =
-    ## Frees the memory allocated with ``alloc``, ``alloc0`` or
-    ## ``realloc``.
+  proc dealloc*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
+    ## Frees the memory allocated with `alloc`, `alloc0`,
+    ## `realloc`, `create` or `createU`.
     ##
     ## **This procedure is dangerous!**
     ## If one forgets to free the memory a leak occurs; if one tries to
@@ -190,24 +203,24 @@ when hasAlloc and not defined(js):
 
   template allocShared*(size: Natural): pointer =
     ## Allocates a new memory block on the shared heap with at
-    ## least ``size`` bytes.
+    ## least `size` bytes.
     ##
     ## The block has to be freed with
-    ## `reallocShared(block, 0) <#reallocShared,pointer,Natural>`_
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
     ## or `deallocShared(block) <#deallocShared,pointer>`_.
     ##
     ## The block is not initialized, so reading from it before writing
     ## to it is undefined behaviour!
     ##
     ## See also:
-    ## `allocShared0 <#allocShared0,Natural>`_.
+    ## * `allocShared0 <#allocShared0.t,Natural>`_.
     incStat(allocCount)
     allocSharedImpl(size)
 
   proc createSharedU*(T: typedesc, size = 1.Positive): ptr T {.inline, tags: [],
                                                                benign, raises: [].} =
     ## Allocates a new memory block on the shared heap with at
-    ## least ``T.sizeof * size`` bytes.
+    ## least `T.sizeof * size` bytes.
     ##
     ## The block has to be freed with
     ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
@@ -222,21 +235,21 @@ when hasAlloc and not defined(js):
 
   template allocShared0*(size: Natural): pointer =
     ## Allocates a new memory block on the shared heap with at
-    ## least ``size`` bytes.
+    ## least `size` bytes.
     ##
     ## The block has to be freed with
-    ## `reallocShared(block, 0) <#reallocShared,pointer,Natural>`_
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
     ## or `deallocShared(block) <#deallocShared,pointer>`_.
     ##
     ## The block is initialized with all bytes
     ## containing zero, so it is somewhat safer than
-    ## `allocShared <#allocShared,Natural>`_.
+    ## `allocShared <#allocShared.t,Natural>`_.
     incStat(allocCount)
     allocShared0Impl(size)
 
   proc createShared*(T: typedesc, size = 1.Positive): ptr T {.inline.} =
     ## Allocates a new memory block on the shared heap with at
-    ## least ``T.sizeof * size`` bytes.
+    ## least `T.sizeof * size` bytes.
     ##
     ## The block has to be freed with
     ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
@@ -251,9 +264,9 @@ when hasAlloc and not defined(js):
     ## Grows or shrinks a given memory block on the heap.
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``reallocShared`` calls
-    ## ``deallocShared(p)``.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
     ## In other cases the block has to be freed with
     ## `deallocShared <#deallocShared,pointer>`_.
     reallocSharedImpl(p, newSize)
@@ -265,9 +278,9 @@ when hasAlloc and not defined(js):
     ## containing zero, so it is somewhat safer then reallocShared
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``reallocShared`` calls
-    ## ``deallocShared(p)``.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
     ## In other cases the block has to be freed with
     ## `deallocShared <#deallocShared,pointer>`_.
     reallocShared0Impl(p, oldSize, newSize)
@@ -276,16 +289,16 @@ when hasAlloc and not defined(js):
     ## Grows or shrinks a given memory block on the heap.
     ##
     ## If `p` is **nil** then a new memory block is returned.
-    ## In either way the block has at least ``T.sizeof * newSize`` bytes.
-    ## If ``newSize == 0`` and `p` is not **nil** ``resizeShared`` calls
-    ## ``freeShared(p)``.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resizeShared` calls
+    ## `freeShared(p)`.
     ## In other cases the block has to be freed with
     ## `freeShared <#freeShared,ptr.T>`_.
     cast[ptr T](reallocShared(p, T.sizeof * newSize))
 
   proc deallocShared*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
-    ## Frees the memory allocated with ``allocShared``, ``allocShared0`` or
-    ## ``reallocShared``.
+    ## Frees the memory allocated with `allocShared`, `allocShared0` or
+    ## `reallocShared`.
     ##
     ## **This procedure is dangerous!**
     ## If one forgets to free the memory a leak occurs; if one tries to
@@ -295,8 +308,8 @@ when hasAlloc and not defined(js):
     deallocSharedImpl(p)
 
   proc freeShared*[T](p: ptr T) {.inline, benign, raises: [].} =
-    ## Frees the memory allocated with ``createShared``, ``createSharedU`` or
-    ## ``resizeShared``.
+    ## Frees the memory allocated with `createShared`, `createSharedU` or
+    ## `resizeShared`.
     ##
     ## **This procedure is dangerous!**
     ## If one forgets to free the memory a leak occurs; if one tries to
@@ -304,6 +317,87 @@ when hasAlloc and not defined(js):
     ## or other memory may be corrupted.
     deallocShared(p)
 
+  include bitmasks
+
+  template `+!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) +% int(s))
+
+  template `-!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) -% int(s))
+
+  proc alignedAlloc(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared(size)
+      else:
+        result = alloc(size)
+    else:
+      # allocate (size + align - 1) necessary for alignment,
+      # plus 2 bytes to store offset
+      when compileOption("threads"):
+        let base = allocShared(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc(size + align - 1 + sizeof(uint16))
+      # memory layout: padding + offset (2 bytes) + user_data
+      # in order to deallocate: read offset at user_data - 2 bytes,
+      # then deallocate user_data - offset
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedAlloc0(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared0(size)
+      else:
+        result = alloc0(size)
+    else:
+      # see comments for alignedAlloc
+      when compileOption("threads"):
+        let base = allocShared0(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc0(size + align - 1 + sizeof(uint16))
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedDealloc(p: pointer, align: int) {.compilerproc.} =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        deallocShared(p)
+      else:
+        dealloc(p)
+    else:
+      # read offset at p - 2 bytes, then deallocate (p - offset) pointer
+      let offset = cast[ptr uint16](p -! sizeof(uint16))[]
+      when compileOption("threads"):
+        deallocShared(p -! offset)
+      else:
+        dealloc(p -! offset)
+
+  proc alignedRealloc(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared(p, newSize)
+      else:
+        result = realloc(p, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      alignedDealloc(p, align)
+
+  proc alignedRealloc0(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared0(p, oldSize, newSize)
+      else:
+        result = realloc0(p, oldSize, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      zeroMem(result +! oldSize, newSize - oldSize)
+      alignedDealloc(p, align)
+
   {.pop.}
 
 # GC interface:
diff --git a/lib/system/memory.nim b/lib/system/memory.nim
index 50faa3d86..156773c48 100644
--- a/lib/system/memory.nim
+++ b/lib/system/memory.nim
@@ -2,9 +2,6 @@
 
 const useLibC = not defined(nimNoLibc)
 
-when not defined(nimHasHotCodeReloading):
-  {.pragma: nonReloadable.}
-
 when useLibC:
   import ansi_c
 
@@ -46,6 +43,7 @@ proc nimCmpMem*(a, b: pointer, size: Natural): cint {.compilerproc, nonReloadabl
       inc i
 
 proc nimCStrLen*(a: cstring): int {.compilerproc, nonReloadable, inline.} =
+  if a.isNil: return 0
   when useLibC:
     cast[int](c_strlen(a))
   else:
diff --git a/lib/system/memtracker.nim b/lib/system/memtracker.nim
index 115d4a973..289f4e024 100644
--- a/lib/system/memtracker.nim
+++ b/lib/system/memtracker.nim
@@ -35,7 +35,7 @@ type
     count*: int
     disabled: bool
     data*: array[400, LogEntry]
-  TrackLogger* = proc (log: TrackLog) {.nimcall, tags: [], locks: 0, gcsafe.}
+  TrackLogger* = proc (log: TrackLog) {.nimcall, tags: [], gcsafe.}
 
 var
   gLog*: TrackLog
@@ -70,9 +70,9 @@ proc addEntry(entry: LogEntry) =
     if interesting:
       gLog.disabled = true
       cprintf("interesting %s:%ld %s\n", entry.file, entry.line, entry.op)
-      let x = cast[proc() {.nimcall, tags: [], gcsafe, locks: 0, raises: [].}](writeStackTrace)
+      let x = cast[proc() {.nimcall, tags: [], gcsafe, raises: [].}](writeStackTrace)
       x()
-      quit 1
+      rawQuit 1
       #if gLog.count > high(gLog.data):
       #  gLogger(gLog)
       #  gLog.count = 0
@@ -85,7 +85,7 @@ proc memTrackerWrite(address: pointer; size: int; file: cstring; line: int) {.co
       size: size, file: file, line: line, thread: myThreadId())
 
 proc memTrackerOp*(op: cstring; address: pointer; size: int) {.tags: [],
-         locks: 0, gcsafe.} =
+         gcsafe.} =
   addEntry LogEntry(op: op, address: address, size: size,
       file: "", line: 0, thread: myThreadId())
 
@@ -100,6 +100,7 @@ proc logPendingOps() {.noconv.} =
   gLogger(gLog)
   gLog.count = 0
 
-addQuitProc logPendingOps
+import std/exitprocs
+addExitProc logPendingOps
 
 {.pop.}
diff --git a/lib/system/mm/go.nim b/lib/system/mm/go.nim
index b6d5a1a44..8f3aeb964 100644
--- a/lib/system/mm/go.nim
+++ b/lib/system/mm/go.nim
@@ -35,8 +35,6 @@ proc goMalloc(size: uint): pointer {.importc: "go_malloc", dynlib: goLib.}
 proc goSetFinalizer(obj: pointer, f: pointer) {.importc: "set_finalizer", codegenDecl:"$1 $2$3 __asm__ (\"main.Set_finalizer\");\n$1 $2$3", dynlib: goLib.}
 proc writebarrierptr(dest: PPointer, src: pointer) {.importc: "writebarrierptr", codegenDecl:"$1 $2$3 __asm__ (\"main.Atomic_store_pointer\");\n$1 $2$3", dynlib: goLib.}
 
-proc `$`*(x: uint64): string {.noSideEffect, raises: [].}
-
 proc GC_getStatistics(): string =
   var mstats = goMemStats()
   result = "[GC] total allocated memory: " & $(mstats.total_alloc) & "\n" &
@@ -111,7 +109,7 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   writebarrierptr(addr(result), newSeq(typ, len))
 
 proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
-  result = newObj(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
+  result = newObjNoInit(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
   cast[PGenericSeq](result).len = 0
   cast[PGenericSeq](result).reserved = cap
   cast[PGenericSeq](result).elemSize = typ.base.size
@@ -132,8 +130,8 @@ proc growObj(old: pointer, newsize: int): pointer =
 
 proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
 proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = discard
-proc nimGCunrefRC1(p: pointer) {.compilerProc, inline.} = discard
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} = discard
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = discard
 
 proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
diff --git a/lib/system/mm/malloc.nim b/lib/system/mm/malloc.nim
index 3dad98e93..47f1a95ae 100644
--- a/lib/system/mm/malloc.nim
+++ b/lib/system/mm/malloc.nim
@@ -2,18 +2,27 @@
 {.push stackTrace: off.}
 
 proc allocImpl(size: Natural): pointer =
-  c_malloc(size.csize_t)
+  result = c_malloc(size.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
 
 proc alloc0Impl(size: Natural): pointer =
-  c_calloc(size.csize_t, 1)
+  result = c_calloc(size.csize_t, 1)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
 
 proc reallocImpl(p: pointer, newSize: Natural): pointer =
-  c_realloc(p, newSize.csize_t)
+  result = c_realloc(p, newSize.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
 
 proc realloc0Impl(p: pointer, oldsize, newSize: Natural): pointer =
   result = realloc(p, newSize.csize_t)
   if newSize > oldSize:
-    zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
 
 proc deallocImpl(p: pointer) =
   c_free(p)
@@ -65,8 +74,10 @@ proc growObj(old: pointer, newsize: int): pointer =
 proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
 proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
 
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-  dest[] = src
+when not defined(gcDestructors):
+  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+
 proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   dest[] = src
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
@@ -77,7 +88,7 @@ type
 
 proc alloc(r: var MemRegion, size: int): pointer =
   result = alloc(size)
-proc alloc0Impl(r: var MemRegion, size: int): pointer =
+proc alloc0(r: var MemRegion, size: int): pointer =
   result = alloc0Impl(size)
 proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
 proc deallocOsPages(r: var MemRegion) = discard
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 5fe1960d1..26f2f0bbf 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -17,10 +17,10 @@ const
   debugGC = false # we wish to debug the GC...
   logGC = false
   traceGC = false # extensive debugging
-  alwaysCycleGC = defined(smokeCycles)
-  alwaysGC = defined(fulldebug) # collect after every memory
+  alwaysCycleGC = defined(nimSmokeCycles)
+  alwaysGC = defined(nimFulldebug) # collect after every memory
                                 # allocation (for debugging)
-  leakDetector = defined(leakDetector)
+  leakDetector = defined(nimLeakDetector)
   overwriteFree = defined(nimBurnFree) # overwrite memory with 0xFF before free
   trackAllocationSource = leakDetector
 
@@ -30,7 +30,7 @@ const
   coalescRight = true
   coalescLeft = true
   logAlloc = false
-  useCellIds = defined(corruption)
+  useCellIds = defined(nimCorruption)
 
 type
   PPointer = ptr pointer
@@ -46,7 +46,7 @@ else:
 proc raiseOutOfMem() {.noinline.} =
   if outOfMemHook != nil: outOfMemHook()
   cstderr.rawWrite("out of memory\n")
-  quit(1)
+  rawQuit(1)
 
 when defined(boehmgc):
   include system / mm / boehm
@@ -57,6 +57,16 @@ elif defined(gogc):
 elif (defined(nogc) or defined(gcDestructors)) and defined(useMalloc):
   include system / mm / malloc
 
+  when defined(nogc):
+    proc GC_getStatistics(): string = ""
+    proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+      result = alloc0(size)
+
+    proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+      result = newObj(typ, align(GenericSeqSize, typ.align) + len * typ.base.size)
+      cast[PGenericSeq](result).len = len
+      cast[PGenericSeq](result).reserved = len
+
 elif defined(nogc):
   include system / mm / none
 
@@ -68,14 +78,13 @@ else:
       include "system/cellsets"
     when not leakDetector and not useCellIds and not defined(nimV2):
       sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
-  when compileOption("gc", "v2"):
-    include "system/gc2"
-  elif defined(gcRegions):
+  when defined(gcRegions):
     # XXX due to bootstrapping reasons, we cannot use  compileOption("gc", "stack") here
     include "system/gc_regions"
   elif defined(nimV2) or usesDestructors:
-    var allocator {.rtlThreadVar.}: MemRegion
-    instantiateForRegion(allocator)
+    when not defined(useNimRtl):
+      var allocator {.rtlThreadVar.}: MemRegion
+      instantiateForRegion(allocator)
     when defined(gcHooks):
       include "system/gc_hooks"
   elif defined(gcMarkAndSweep):
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index bf298e0d4..cf81f6d86 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -14,11 +14,11 @@
 
 const
   buildOS* {.magic: "BuildOS".}: string = ""
-    ## The OS this build is running on. Can be different from ``system.hostOS``
+    ## The OS this build is running on. Can be different from `system.hostOS`
     ## for cross compilations.
 
   buildCPU* {.magic: "BuildCPU".}: string = ""
-    ## The CPU this build is running on. Can be different from ``system.hostCPU``
+    ## The CPU this build is running on. Can be different from `system.hostCPU`
     ## for cross compilations.
 
 template builtin = discard
@@ -57,7 +57,7 @@ proc warningImpl(arg, orig: string) = discard
 proc hintImpl(arg, orig: string) = discard
 
 proc paramStr*(i: int): string =
-  ## Retrieves the ``i``'th command line parameter.
+  ## Retrieves the `i`'th command line parameter.
   builtin
 
 proc paramCount*(): int =
@@ -66,7 +66,7 @@ proc paramCount*(): int =
 
 proc switch*(key: string, val="") =
   ## Sets a Nim compiler command line switch, for
-  ## example ``switch("checks", "on")``.
+  ## example `switch("checks", "on")`.
   builtin
 
 proc warning*(name: string; val: bool) =
@@ -82,16 +82,16 @@ proc hint*(name: string; val: bool) =
 proc patchFile*(package, filename, replacement: string) =
   ## Overrides the location of a given file belonging to the
   ## passed package.
-  ## If the ``replacement`` is not an absolute path, the path
+  ## If the `replacement` is not an absolute path, the path
   ## is interpreted to be local to the Nimscript file that contains
-  ## the call to ``patchFile``, Nim's ``--path`` is not used at all
+  ## the call to `patchFile`, Nim's `--path` is not used at all
   ## to resolve the filename!
+  ## The compiler also performs `path substitution <nimc.html#compiler-usage-commandminusline-switches>`_ on `replacement`.
   ##
   ## Example:
-  ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##   patchFile("stdlib", "asyncdispatch", "patches/replacement")
+  ##   ```
   discard
 
 proc getCommand*(): string =
@@ -112,19 +112,19 @@ proc cmpic*(a, b: string): int =
   cmpIgnoreCase(a, b)
 
 proc getEnv*(key: string; default = ""): string {.tags: [ReadIOEffect].} =
-  ## Retrieves the environment variable of name ``key``.
+  ## Retrieves the environment variable of name `key`.
   builtin
 
 proc existsEnv*(key: string): bool {.tags: [ReadIOEffect].} =
-  ## Checks for the existence of an environment variable named ``key``.
+  ## Checks for the existence of an environment variable named `key`.
   builtin
 
 proc putEnv*(key, val: string) {.tags: [WriteIOEffect].} =
-  ## Sets the value of the environment variable named ``key`` to ``val``.
+  ## Sets the value of the environment variable named `key` to `val`.
   builtin
 
 proc delEnv*(key: string) {.tags: [WriteIOEffect].} =
-  ## Deletes the environment variable named ``key``.
+  ## Deletes the environment variable named `key`.
   builtin
 
 proc fileExists*(filename: string): bool {.tags: [ReadIOEffect].} =
@@ -136,17 +136,8 @@ proc dirExists*(dir: string): bool {.
   ## Checks if the directory `dir` exists.
   builtin
 
-template existsFile*(args: varargs[untyped]): untyped {.deprecated: "use fileExists".} =
-  # xxx: warning won't be shown for nimsscript because of current logic handling
-  # `foreignPackageNotes`
-  fileExists(args)
-
-template existsDir*(args: varargs[untyped]): untyped {.deprecated: "use dirExists".} =
-  dirExists(dir)
-
-proc selfExe*(): string =
+proc selfExe*(): string {.deprecated: "Deprecated since v1.7; Use getCurrentCompilerExe".} =
   ## Returns the currently running nim or nimble executable.
-  # TODO: consider making this as deprecated alias of `getCurrentCompilerExe`
   builtin
 
 proc toExe*(filename: string): string =
@@ -159,16 +150,28 @@ proc toDll*(filename: string): string =
 
 proc strip(s: string): string =
   var i = 0
-  while s[i] in {' ', '\c', '\L'}: inc i
+  while s[i] in {' ', '\c', '\n'}: inc i
   result = s.substr(i)
+  if result[0] == '"' and result[^1] == '"':
+    result = result[1..^2]
 
 template `--`*(key, val: untyped) =
-  ## A shortcut for ``switch(astToStr(key), astToStr(val))``.
-  switch(astToStr(key), strip astToStr(val))
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --path:somePath # same as switch("path", "somePath")
+  ##   --path:"someOtherPath" # same as switch("path", "someOtherPath")
+  ##   --hint:"[Conf]:off" # same as switch("hint", "[Conf]:off")
+  ##   ```
+  switch(strip(astToStr(key)), strip(astToStr(val)))
 
 template `--`*(key: untyped) =
-  ## A shortcut for ``switch(astToStr(key)``.
-  switch(astToStr(key), "")
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --listCmd # same as switch("listCmd")
+  ##   ```
+  switch(strip(astToStr(key)))
 
 type
   ScriptMode* {.pure.} = enum ## Controls the behaviour of the script.
@@ -191,7 +194,7 @@ template checkOsError =
 template log(msg: string, body: untyped) =
   if mode in {ScriptMode.Verbose, ScriptMode.Whatif}:
     echo "[NimScript] ", msg
-  if mode != ScriptMode.WhatIf:
+  if mode != ScriptMode.Whatif:
     body
 
 proc listDirs*(dir: string): seq[string] =
@@ -248,32 +251,39 @@ proc cpDir*(`from`, to: string) {.raises: [OSError].} =
     checkOsError()
 
 proc exec*(command: string) {.
-  raises: [OSError], tags: [ExecIOEffect].} =
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external process. If the external process terminates with
-  ## a non-zero exit code, an OSError exception is raised.
+  ## a non-zero exit code, an OSError exception is raised. The command is
+  ## executed relative to the current source path.
   ##
-  ## **Note:** If you need a version of ``exec`` that returns the exit code
-  ## and text output of the command, you can use `system.gorgeEx
-  ## <system.html#gorgeEx,string,string,string>`_.
+  ## .. note:: If you need a version of `exec` that returns the exit code
+  ##   and text output of the command, you can use `system.gorgeEx
+  ##   <system.html#gorgeEx,string,string,string>`_.
   log "exec: " & command:
     if rawExec(command) != 0:
       raise newException(OSError, "FAILED: " & command)
     checkOsError()
 
 proc exec*(command: string, input: string, cache = "") {.
-  raises: [OSError], tags: [ExecIOEffect].} =
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external process. If the external process terminates with
   ## a non-zero exit code, an OSError exception is raised.
+  ##
+  ## .. warning:: This version of `exec` is executed relative to the nimscript
+  ##   module path, which affects how the command resolves relative paths. Thus
+  ##   it is generally better to use `gorgeEx` directly when you need more
+  ##   control over the execution environment or when working with commands
+  ##   that deal with relative paths.
   log "exec: " & command:
     let (output, exitCode) = gorgeEx(command, input, cache)
+    echo output
     if exitCode != 0:
       raise newException(OSError, "FAILED: " & command)
-    echo output
 
 proc selfExec*(command: string) {.
-  raises: [OSError], tags: [ExecIOEffect].} =
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external command with the current nim/nimble executable.
-  ## ``Command`` must not contain the "nim " part.
+  ## `Command` must not contain the "nim " part.
   let c = selfExe() & " " & command
   log "exec: " & c:
     if rawExec(c) != 0:
@@ -310,9 +320,9 @@ proc projectPath*(): string =
   builtin
 
 proc thisDir*(): string =
-  ## Retrieves the directory of the current ``nims`` script file. Its path is
-  ## obtained via ``currentSourcePath`` (although, currently,
-  ## ``currentSourcePath`` resolves symlinks, unlike ``thisDir``).
+  ## Retrieves the directory of the current `nims` script file. Its path is
+  ## obtained via `currentSourcePath` (although, currently,
+  ## `currentSourcePath` resolves symlinks, unlike `thisDir`).
   builtin
 
 proc cd*(dir: string) {.raises: [OSError].} =
@@ -336,19 +346,19 @@ template withDir*(dir: string; body: untyped): untyped =
   ##
   ## If you need a permanent change, use the `cd() <#cd,string>`_ proc.
   ## Usage example:
-  ##
-  ## .. code-block:: nim
+  ##   ```nim
+  ##   # inside /some/path/
   ##   withDir "foo":
-  ##     # inside foo
-  ##   #back to last dir
-  var curDir = getCurrentDir()
+  ##     # move to /some/path/foo/
+  ##   # back in /some/path/
+  ##   ```
+  let curDir = getCurrentDir()
   try:
     cd(dir)
     body
   finally:
     cd(curDir)
 
-
 proc writeTask(name, desc: string) =
   if desc.len > 0:
     var spaces = " "
@@ -356,25 +366,25 @@ proc writeTask(name, desc: string) =
     echo name, spaces, desc
 
 proc cppDefine*(define: string) =
-  ## tell Nim that ``define`` is a C preprocessor ``#define`` and so always
+  ## tell Nim that `define` is a C preprocessor `#define` and so always
   ## needs to be mangled.
   builtin
 
-proc stdinReadLine(): TaintedString {.
+proc stdinReadLine(): string {.
   tags: [ReadIOEffect], raises: [IOError].} =
   builtin
 
-proc stdinReadAll(): TaintedString {.
+proc stdinReadAll(): string {.
   tags: [ReadIOEffect], raises: [IOError].} =
   builtin
 
-proc readLineFromStdin*(): TaintedString {.raises: [IOError].} =
+proc readLineFromStdin*(): string {.raises: [IOError].} =
   ## Reads a line of data from stdin - blocks until \n or EOF which happens when stdin is closed
   log "readLineFromStdin":
     result = stdinReadLine()
     checkError(EOFError)
 
-proc readAllFromStdin*(): TaintedString {.raises: [IOError].} =
+proc readAllFromStdin*(): string {.raises: [IOError].} =
   ## Reads all data from stdin - blocks until EOF which happens when stdin is closed
   log "readAllFromStdin":
     result = stdinReadAll()
@@ -386,24 +396,25 @@ when not defined(nimble):
     ## Defines a task. Hidden tasks are supported via an empty description.
     ##
     ## Example:
+    ##   ```nim
+    ##   task build, "default build is via the C backend":
+    ##     setCommand "c"
+    ##   ```
     ##
-    ## .. code-block:: nim
-    ##  task build, "default build is via the C backend":
-    ##    setCommand "c"
-    ##
-    ## For a task named ``foo``, this template generates a ``proc`` named
-    ## ``fooTask``.  This is useful if you need to call one task in
+    ## For a task named `foo`, this template generates a `proc` named
+    ## `fooTask`.  This is useful if you need to call one task in
     ## another in your Nimscript.
     ##
     ## Example:
     ##
-    ## .. code-block:: nim
-    ##  task foo, "foo":        # > nim foo
-    ##    echo "Running foo"    # Running foo
+    ##   ```nim
+    ##   task foo, "foo":        # > nim foo
+    ##     echo "Running foo"    # Running foo
     ##
-    ##  task bar, "bar":        # > nim bar
-    ##    echo "Running bar"    # Running bar
-    ##    fooTask()             # Running foo
+    ##   task bar, "bar":        # > nim bar
+    ##     echo "Running bar"    # Running bar
+    ##     fooTask()             # Running foo
+    ##   ```
     proc `name Task`*() =
       setCommand "nop"
       body
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
new file mode 100644
index 000000000..c02a24989
--- /dev/null
+++ b/lib/system/orc.nim
@@ -0,0 +1,543 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Cycle collector based on
+# https://www.cs.purdue.edu/homes/hosking/690M/Bacon01Concurrent.pdf
+# And ideas from Lins' in 2008 by the notion of "critical links", see
+# "Cyclic reference counting" by Rafael Dueire Lins
+# R.D. Lins / Information Processing Letters 109 (2008) 71–78
+#
+
+include cellseqs_v2
+
+const
+  colBlack = 0b000
+  colGray = 0b001
+  colWhite = 0b010
+  maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
+  jumpStackFlag = 0b1000
+  colorMask = 0b011
+
+  logOrc = defined(nimArcIds)
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
+  DisposeProc = proc (p: pointer) {.nimcall, benign.}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  when col == colBlack:
+    c.rc = c.rc and not colorMask
+  else:
+    c.rc = c.rc and not colorMask or col
+
+const
+  optimizedOrc = false # not defined(nimOldOrc)
+# XXX Still incorrect, see tests/arc/tdestroy_in_loopcond
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  inc h.rc, rcIncrement
+  when optimizedOrc:
+    if cyclic:
+      h.rc = h.rc or maybeCycle
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
+  when optimizedOrc:
+    if p != nil:
+      let h = head(p)
+      h.rc = h.rc or maybeCycle
+
+proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+  # This is only used by the old RTTI mechanism and we know
+  # that 'dest[]' is nil and needs no destruction. Which is really handy
+  # as we cannot destroy the object reliably if it's an object of unknown
+  # compile-time type.
+  dest[] = src
+  if src != nil: nimIncRefCyclic(src, true)
+
+const
+  useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+    when useJumpStack:
+      jumpStack: CellSeq[ptr pointer]   # Lins' jump stack in order to speed up traversals
+    toFree: CellSeq[Cell]
+    freed, touched, edges, rcSum: int
+    keepThreshold: bool
+
+proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  if desc.traceImpl != nil:
+    var p = s +! sizeof(RefHeader)
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+include threadids
+
+when logOrc or orcLeakDetector:
+  proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
+    when orcLeakDetector:
+      cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.filename, s.line, s.color, getThreadId())
+    else:
+      cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.refId, s.rootIdx, s.rc shr rcShift, s.color, getThreadId())
+
+proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
+  when traceCollector:
+    cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
+  let p = s +! sizeof(RefHeader)
+
+  when logOrc: writeCell("free", s, desc)
+
+  if desc.destructor != nil:
+    cast[DestructorProc](desc.destructor)(p)
+
+  when false:
+    cstderr.rawWrite desc.name
+    cstderr.rawWrite " "
+    if desc.destructor == nil:
+      cstderr.rawWrite "lacks dispose"
+      if desc.traceImpl != nil:
+        cstderr.rawWrite ", but has trace\n"
+      else:
+        cstderr.rawWrite ", and lacks trace\n"
+    else:
+      cstderr.rawWrite "has dispose!\n"
+
+  nimRawDispose(p, desc.align)
+
+template orcAssert(cond, msg) =
+  when logOrc:
+    if not cond:
+      cfprintf(cstderr, "[Bug!] %s\n", msg)
+      rawQuit 1
+
+when logOrc:
+  proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+
+    orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!"
+
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+var
+  roots {.threadvar.}: CellSeq[Cell]
+
+proc unregisterCycle(s: Cell) =
+  # swap with the last element. O(1)
+  let idx = s.rootIdx-1
+  when false:
+    if idx >= roots.len or idx < 0:
+      cprintf("[Bug!] %ld %ld\n", idx, roots.len)
+      rawQuit 1
+  roots.d[idx] = roots.d[roots.len-1]
+  roots.d[idx][0].rootIdx = idx+1
+  dec roots.len
+  s.rootIdx = 0
+
+proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scanBlack(s: Cell) =
+    setColor(s, colBlack)
+    for t in sons(s):
+      t.rc = t.rc + rcIncrement
+      if t.color != colBlack:
+        scanBlack(t)
+  ]#
+  s.setColor colBlack
+  let until = j.traceStack.len
+  trace(s, desc, j)
+  when logOrc: writeCell("root still alive", s, desc)
+  while j.traceStack.len > until:
+    let (entry, desc) = j.traceStack.pop()
+    let t = head entry[]
+    inc t.rc, rcIncrement
+    if t.color != colBlack:
+      t.setColor colBlack
+      trace(t, desc, j)
+      when logOrc: writeCell("child still alive", t, desc)
+
+proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc markGray(s: Cell) =
+    if s.color != colGray:
+      setColor(s, colGray)
+      for t in sons(s):
+        t.rc = t.rc - rcIncrement
+        if t.color != colGray:
+          markGray(t)
+  ]#
+  if s.color != colGray:
+    s.setColor colGray
+    inc j.touched
+    # keep in mind that refcounts are zero based so add 1 here:
+    inc j.rcSum, (s.rc shr rcShift) + 1
+    orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      dec t.rc, rcIncrement
+      inc j.edges
+      when useJumpStack:
+        if (t.rc shr rcShift) >= 0 and (t.rc and jumpStackFlag) == 0:
+          t.rc = t.rc or jumpStackFlag
+          when traceCollector:
+            cprintf("[Now in jumpstack] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
+          j.jumpStack.add(entry, desc)
+      if t.color != colGray:
+        t.setColor colGray
+        inc j.touched
+        # we already decremented its refcount so account for that:
+        inc j.rcSum, (t.rc shr rcShift) + 2
+        trace(t, desc, j)
+
+proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scan(s: Cell) =
+    if s.color == colGray:
+      if s.rc > 0:
+        scanBlack(s)
+      else:
+        s.setColor(colWhite)
+        for t in sons(s): scan(t)
+  ]#
+  if s.color == colGray:
+    if (s.rc shr rcShift) >= 0:
+      scanBlack(s, desc, j)
+      # XXX this should be done according to Lins' paper but currently breaks
+      #when useJumpStack:
+      #  s.setColor colPurple
+    else:
+      when useJumpStack:
+        # first we have to repair all the nodes we have seen
+        # that are still alive; we also need to mark what they
+        # refer to as alive:
+        while j.jumpStack.len > 0:
+          let (entry, desc) = j.jumpStack.pop
+          let t = head entry[]
+          # not in jump stack anymore!
+          t.rc = t.rc and not jumpStackFlag
+          if t.color == colGray and (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+            # XXX this should be done according to Lins' paper but currently breaks
+            #t.setColor colPurple
+            when traceCollector:
+              cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+      orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
+      s.setColor(colWhite)
+      trace(s, desc, j)
+      while j.traceStack.len > 0:
+        let (entry, desc) = j.traceStack.pop()
+        let t = head entry[]
+        if t.color == colGray:
+          if (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+          else:
+            when useJumpStack:
+              # first we have to repair all the nodes we have seen
+              # that are still alive; we also need to mark what they
+              # refer to as alive:
+              while j.jumpStack.len > 0:
+                let (entry, desc) = j.jumpStack.pop
+                let t = head entry[]
+                # not in jump stack anymore!
+                t.rc = t.rc and not jumpStackFlag
+                if t.color == colGray and (t.rc shr rcShift) >= 0:
+                  scanBlack(t, desc, j)
+                  # XXX this should be done according to Lins' paper but currently breaks
+                  #t.setColor colPurple
+                  when traceCollector:
+                    cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+            t.setColor(colWhite)
+            trace(t, desc, j)
+
+when false:
+  proc writeCell(msg: cstring; s: Cell) =
+    cfprintf(cstderr, "%s %p root index: %ld; RC: %ld; color: %ld\n",
+      msg, s, s.rootIdx, s.rc shr rcShift, s.color)
+
+proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
+  #[
+    was: 'collectWhite'.
+
+  proc collectWhite(s: Cell) =
+    if s.color == colWhite and not buffered(s):
+      s.setColor(colBlack)
+      for t in sons(s):
+        collectWhite(t)
+      free(s) # watch out, a bug here!
+  ]#
+  if s.color == col and s.rootIdx == 0:
+    orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
+
+    s.setColor(colBlack)
+    j.toFree.add(s, desc)
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      entry[] = nil # ensure that the destructor does touch moribund objects!
+      if t.color == col and t.rootIdx == 0:
+        j.toFree.add(t, desc)
+        t.setColor(colBlack)
+        trace(t, desc, j)
+
+const
+  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
+
+when defined(nimStressOrc):
+  const rootsThreshold = 10 # broken with -d:nimStressOrc: 10 and for havlak iterations 1..8
+else:
+  var rootsThreshold {.threadvar.}: int
+
+proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
+  # pretty direct translation from
+  # https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
+  # Fig. 2. Synchronous Cycle Collection
+  #[
+    for s in roots:
+      markGray(s)
+    for s in roots:
+      scan(s)
+    for s in roots:
+      remove s from roots
+      s.buffered = false
+      collectWhite(s)
+  ]#
+  let last = roots.len - 1
+  when logOrc:
+    for i in countdown(last, lowMark):
+      writeCell("root", roots.d[i][0], roots.d[i][1])
+
+  for i in countdown(last, lowMark):
+    markGray(roots.d[i][0], roots.d[i][1], j)
+
+  var colToCollect = colWhite
+  if j.rcSum == j.edges:
+    # short-cut: we know everything is garbage:
+    colToCollect = colGray
+    # remember the fact that we got so lucky:
+    j.keepThreshold = true
+  else:
+    for i in countdown(last, lowMark):
+      scan(roots.d[i][0], roots.d[i][1], j)
+
+  init j.toFree
+  for i in 0 ..< roots.len:
+    let s = roots.d[i][0]
+    s.rootIdx = 0
+    collectColor(s, roots.d[i][1], colToCollect, j)
+
+  # Bug #22927: `free` calls destructors which can append to `roots`.
+  # We protect against this here by setting `roots.len` to 0 and also
+  # setting the threshold so high that no cycle collection can be triggered
+  # until we are out of this critical section:
+  when not defined(nimStressOrc):
+    let oldThreshold = rootsThreshold
+    rootsThreshold = high(int)
+  roots.len = 0
+
+  for i in 0 ..< j.toFree.len:
+    when orcLeakDetector:
+      writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
+    free(j.toFree.d[i][0], j.toFree.d[i][1])
+
+  when not defined(nimStressOrc):
+    rootsThreshold = oldThreshold
+
+  inc j.freed, j.toFree.len
+  deinit j.toFree
+
+when defined(nimOrcStats):
+  var freedCyclicObjects {.threadvar.}: int
+
+proc partialCollect(lowMark: int) =
+  when false:
+    if roots.len < 10 + lowMark: return
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] begin\n")
+  var j: GcEnv
+  init j.traceStack
+  collectCyclesBacon(j, lowMark)
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] end; freed %ld touched: %ld work: %ld\n", j.freed, j.touched,
+      roots.len - lowMark)
+  roots.len = lowMark
+  deinit j.traceStack
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+proc collectCycles() =
+  ## Collect cycles.
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] begin\n")
+
+  var j: GcEnv
+  init j.traceStack
+  when useJumpStack:
+    init j.jumpStack
+    collectCyclesBacon(j, 0)
+    while j.jumpStack.len > 0:
+      let (t, desc) = j.jumpStack.pop
+      # not in jump stack anymore!
+      t.rc = t.rc and not jumpStackFlag
+    deinit j.jumpStack
+  else:
+    collectCyclesBacon(j, 0)
+
+  deinit j.traceStack
+  if roots.len == 0:
+    deinit roots
+
+  when not defined(nimStressOrc):
+    # compute the threshold based on the previous history
+    # of the cycle collector's effectiveness:
+    # we're effective when we collected 50% or more of the nodes
+    # we touched. If we're effective, we can reset the threshold:
+    if j.keepThreshold:
+      discard
+    elif j.freed * 2 >= j.touched:
+      when not defined(nimFixedOrc):
+        rootsThreshold = max(rootsThreshold div 3 * 2, 16)
+      else:
+        rootsThreshold = 0
+      #cfprintf(cstderr, "[collectCycles] freed %ld, touched %ld new threshold %ld\n", j.freed, j.touched, rootsThreshold)
+    elif rootsThreshold < high(int) div 4:
+      rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold) * 3 div 2
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld touched: %ld mem: %ld rcSum: %ld edges: %ld\n", j.freed, rootsThreshold, j.touched,
+      getOccupiedMem(), j.rcSum, j.edges)
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+when defined(nimOrcStats):
+  type
+    OrcStats* = object ## Statistics of the cycle collector subsystem.
+      freedCyclicObjects*: int ## Number of freed cyclic objects.
+  proc GC_orcStats*(): OrcStats =
+    ## Returns the statistics of the cycle collector subsystem.
+    result = OrcStats(freedCyclicObjects: freedCyclicObjects)
+
+proc registerCycle(s: Cell; desc: PNimTypeV2) =
+  s.rootIdx = roots.len+1
+  if roots.d == nil: init(roots)
+  add(roots, s, desc)
+
+  if roots.len - defaultThreshold >= rootsThreshold:
+    collectCycles()
+  when logOrc:
+    writeCell("[added root]", s, desc)
+
+  orcAssert strstr(desc.name, "TType") == nil, "added a TType as a root!"
+
+proc GC_runOrc* =
+  ## Forces a cycle collection pass.
+  collectCycles()
+  orcAssert roots.len == 0, "roots not empty!"
+
+proc GC_enableOrc*() =
+  ## Enables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = 0
+
+proc GC_disableOrc*() =
+  ## Disables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = high(int)
+
+proc GC_prepareOrc*(): int {.inline.} = roots.len
+
+proc GC_partialCollect*(limit: int) =
+  partialCollect(limit)
+
+proc GC_fullCollect* =
+  ## Forces a full garbage collection pass. With `--mm:orc` triggers the cycle
+  ## collector. This is an alias for `GC_runOrc`.
+  collectCycles()
+
+proc GC_enableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_enableOrc`.
+  GC_enableOrc()
+
+proc GC_disableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_disableOrc`.
+  GC_disableOrc()
+
+const
+  acyclicFlag = 1 # see also cggtypes.nim, proc genTypeInfoV2Impl
+
+when optimizedOrc:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0
+else:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0
+
+proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
+  if isDestroyAction:
+    if s.rootIdx > 0:
+      unregisterCycle(s)
+  else:
+    # do not call 'rememberCycle' again unless this cell
+    # got an 'incRef' event:
+    if s.rootIdx == 0 and markedAsCyclic(s, desc):
+      s.setColor colBlack
+      registerCycle(s, desc)
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, cast[ptr PNimTypeV2](p)[])
+
+proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    if result:
+      if cell.rootIdx > 0:
+        unregisterCycle(cell)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p %s\n", p, desc.name)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, desc)
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
index 32d3b166d..5509d0070 100644
--- a/lib/system/osalloc.nim
+++ b/lib/system/osalloc.nim
@@ -28,7 +28,30 @@ const doNotUnmap = not (defined(amd64) or defined(i386)) or
                    defined(windows) or defined(nimAllocNoUnmap)
 
 
-when defined(emscripten) and not defined(StandaloneHeapSize):
+when defined(nimAllocPagesViaMalloc):
+  when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+    {.error: "-d:nimAllocPagesViaMalloc is only supported with --mm:arc or --mm:atomicArc or --mm:orc".}
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    let base = c_malloc(csize_t size + PageSize - 1 + sizeof(uint32))
+    if base == nil: raiseOutOfMem()
+    # memory layout: padding + offset (4 bytes) + user_data
+    # in order to deallocate: read offset at user_data - 4 bytes,
+    # then deallocate user_data - offset
+    let offset = PageSize - (cast[int](base) and (PageSize - 1))
+    cast[ptr uint32](base +! (offset - sizeof(uint32)))[] = uint32(offset)
+    result = base +! offset
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = osTryAllocPages(size)
+    if result == nil: raiseOutOfMem()
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    # read offset at p - 4 bytes, then deallocate (p - offset) pointer
+    let offset = cast[ptr uint32](p -! sizeof(uint32))[]
+    c_free(p -! offset)
+
+elif defined(emscripten) and not defined(StandaloneHeapSize):
   const
     PROT_READ  = 1             # page can be read
     PROT_WRITE = 2             # page can be written
@@ -57,12 +80,12 @@ when defined(emscripten) and not defined(StandaloneHeapSize):
     let pos = cast[int](result)
 
     # Convert pointer to PageSize correct one.
-    var new_pos = cast[ByteAddress](pos) +% (PageSize - (pos %% PageSize))
+    var new_pos = cast[int](pos) +% (PageSize - (pos %% PageSize))
     if (new_pos-pos) < sizeof(EmscriptenMMapBlock):
       new_pos = new_pos +% PageSize
     result = cast[pointer](new_pos)
 
-    var mmapDescrPos = cast[ByteAddress](result) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](result) -% sizeof(EmscriptenMMapBlock)
 
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     mmapDescr.realSize = realSize
@@ -73,135 +96,24 @@ when defined(emscripten) and not defined(StandaloneHeapSize):
   proc osTryAllocPages(size: int): pointer = osAllocPages(size)
 
   proc osDeallocPages(p: pointer, size: int) {.inline.} =
-    var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](p) -% sizeof(EmscriptenMMapBlock)
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     munmap(mmapDescr.realPointer, mmapDescr.realSize)
 
 elif defined(genode) and not defined(StandaloneHeapSize):
   include genode/alloc # osAllocPages, osTryAllocPages, osDeallocPages
 
-elif defined(nintendoswitch) and not defined(StandaloneHeapSize):
-
-  import nintendoswitch/switch_memory
-
-  type
-    PSwitchBlock = ptr NSwitchBlock
-    ## This will hold the heap pointer data in a separate
-    ## block of memory that is PageSize bytes above
-    ## the requested memory. It's the only good way
-    ## to pass around data with heap allocations
-    NSwitchBlock {.pure, inheritable.} = object
-      realSize: int
-      heap: pointer           # pointer to main heap alloc
-      heapMirror: pointer     # pointer to virtmem mapped heap
-
-  proc alignSize(size: int): int {.inline.} =
-    ## Align a size integer to be in multiples of PageSize
-    ## The nintendo switch will not allocate memory that is not
-    ## aligned to 0x1000 bytes and will just crash.
-    (size + (PageSize - 1)) and not (PageSize - 1)
-
-  proc deallocate(heapMirror: pointer, heap: pointer, size: int) =
-    # Unmap the allocated memory
-    discard svcUnmapMemory(heapMirror, heap, size.uint64)
-    # These should be called (theoretically), but referencing them crashes the switch.
-    # The above call seems to free all heap memory, so these are not needed.
-    # virtmemFreeMap(nswitchBlock.heapMirror, nswitchBlock.realSize.csize)
-    # free(nswitchBlock.heap)
-
-  proc freeMem(p: pointer) =
-    # Retrieve the switch block data from the pointer we set before
-    # The data is located just sizeof(NSwitchBlock) bytes below
-    # the top of the pointer to the heap
-    let
-      nswitchDescrPos = cast[ByteAddress](p) -% sizeof(NSwitchBlock)
-      nswitchBlock = cast[PSwitchBlock](nswitchDescrPos)
-
-    deallocate(
-      nswitchBlock.heapMirror, nswitchBlock.heap, nswitchBlock.realSize
-    )
-
-  proc storeHeapData(address, heapMirror, heap: pointer, size: int) {.inline.} =
-    ## Store data in the heap for deallocation purposes later
-
-    # the position of our heap pointer data. Since we allocated PageSize extra
-    # bytes, we should have a buffer on top of the requested size of at least
-    # PageSize bytes, which is much larger than sizeof(NSwitchBlock). So we
-    # decrement the address by sizeof(NSwitchBlock) and use that address
-    # to store our pointer data
-    let nswitchBlockPos = cast[ByteAddress](address) -% sizeof(NSwitchBlock)
-
-    # We need to store this in a pointer obj (PSwitchBlock) so that the data sticks
-    # at the address we've chosen. If NSwitchBlock is used here, the data will
-    # be all 0 when we try to retrieve it later.
-    var nswitchBlock = cast[PSwitchBlock](nswitchBlockPos)
-    nswitchBlock.realSize = size
-    nswitchBlock.heap = heap
-    nswitchBlock.heapMirror = heapMirror
-
-  proc getOriginalHeapPosition(address: pointer, difference: int): pointer {.inline.} =
-    ## This function sets the heap back to the originally requested
-    ## size
-    let
-      pos = cast[int](address)
-      newPos = cast[ByteAddress](pos) +% difference
-
-    return cast[pointer](newPos)
-
-  template allocPages(size: int, outOfMemoryStmt: untyped): untyped =
-    # This is to ensure we get a block of memory the requested
-    # size, as well as space to store our structure
-    let realSize = alignSize(size + sizeof(NSwitchBlock))
-
-    let heap = memalign(PageSize, realSize)
-
-    if heap.isNil:
-      outOfMemoryStmt
-
-    let heapMirror = virtmemReserveMap(realSize.csize)
-    result = heapMirror
-
-    let rc = svcMapMemory(heapMirror, heap, realSize.uint64)
-    # Any return code not equal 0 means an error in libnx
-    if rc.uint32 != 0:
-      deallocate(heapMirror, heap, realSize)
-      outOfMemoryStmt
-
-    # set result to be the original size requirement
-    result = getOriginalHeapPosition(result, realSize - size)
-
-    storeHeapData(result, heapMirror, heap, realSize)
-
-  proc osAllocPages(size: int): pointer {.inline.} =
-    allocPages(size):
-      raiseOutOfMem()
-
-  proc osTryAllocPages(size: int): pointer =
-    allocPages(size):
-      return nil
-
-  proc osDeallocPages(p: pointer, size: int) =
-    # Note that in order for the Switch not to crash, a call to
-    # deallocHeap(runFinalizers = true, allowGcAfterwards = false)
-    # must be run before gfxExit(). The Switch requires all memory
-    # to be deallocated before the graphics application has exited.
-    #
-    # gfxExit() can be found in <switch/gfx/gfx.h> in the github
-    # repo https://github.com/switchbrew/libnx
-    when reallyOsDealloc:
-      freeMem(p)
-
 elif defined(posix) and not defined(StandaloneHeapSize):
   const
     PROT_READ  = 1             # page can be read
     PROT_WRITE = 2             # page can be written
 
   when defined(netbsd) or defined(openbsd):
-      # OpenBSD security for setjmp/longjmp coroutines
-      var MAP_STACK {.importc: "MAP_STACK", header: "<sys/mman.h>".}: cint
+    # OpenBSD security for setjmp/longjmp coroutines
+    var MAP_STACK {.importc: "MAP_STACK", header: "<sys/mman.h>".}: cint
   else:
     const MAP_STACK = 0             # avoid sideeffects
-    
+
   when defined(macosx) or defined(freebsd):
     const MAP_ANONYMOUS = 0x1000
     const MAP_PRIVATE = 0x02        # Changes are private
@@ -277,7 +189,7 @@ elif defined(windows) and not defined(StandaloneHeapSize):
     when reallyOsDealloc:
       if virtualFree(p, 0, MEM_RELEASE) == 0:
         cprintf "virtualFree failing!"
-        quit 1
+        rawQuit 1
     #VirtualFree(p, size, MEM_DECOMMIT)
 
 elif hostOS == "standalone" or defined(StandaloneHeapSize):
diff --git a/lib/system/platforms.nim b/lib/system/platforms.nim
index 6e39dc7f2..0619f3fca 100644
--- a/lib/system/platforms.nim
+++ b/lib/system/platforms.nim
@@ -10,6 +10,8 @@
 ## Platform detection for NimScript. This module is included by the system module!
 ## Do not import it directly!
 
+# CPU architectures have alias names mapped in tools/niminst/makefile.nimf
+
 type
   CpuPlatform* {.pure.} = enum ## the CPU this program will run on.
     none,                      ## unknown CPU
@@ -33,14 +35,17 @@ type
     vm,                        ## Some Virtual machine: Nim's VM or JavaScript
     avr,                       ## AVR based processor
     msp430,                    ## TI MSP430 microcontroller
-    riscv64                    ## RISC-V 64-bit processor
-    wasm32                     ## WASM, 32-bit
+    riscv32,                   ## RISC-V 32-bit processor
+    riscv64,                   ## RISC-V 64-bit processor
+    wasm32,                    ## WASM, 32-bit
+    e2k,                       ## MCST Elbrus 2000
+    loongarch64,               ## LoongArch 64-bit processor
+    s390x                      ## IBM Z
 
   OsPlatform* {.pure.} = enum ## the OS this program will run on.
     none, dos, windows, os2, linux, morphos, skyos, solaris,
     irix, netbsd, freebsd, openbsd, aix, palmos, qnx, amiga,
-    atari, netware, macos, macosx, haiku, android, js, nimVM,
-    standalone, nintendoswitch
+    atari, netware, macos, macosx, haiku, android, js, standalone, nintendoswitch
 
 const
   targetOS* = when defined(windows): OsPlatform.windows
@@ -65,7 +70,6 @@ const
               elif defined(haiku): OsPlatform.haiku
               elif defined(android): OsPlatform.android
               elif defined(js): OsPlatform.js
-              elif defined(nimVM): OsPlatform.nimVM
               elif defined(standalone): OsPlatform.standalone
               elif defined(nintendoswitch): OsPlatform.nintendoswitch
               else: OsPlatform.none
@@ -91,7 +95,11 @@ const
                elif defined(vm): CpuPlatform.vm
                elif defined(avr): CpuPlatform.avr
                elif defined(msp430): CpuPlatform.msp430
+               elif defined(riscv32): CpuPlatform.riscv32
                elif defined(riscv64): CpuPlatform.riscv64
                elif defined(wasm32): CpuPlatform.wasm32
+               elif defined(e2k): CpuPlatform.e2k
+               elif defined(loongarch64): CpuPlatform.loongarch64
+               elif defined(s390x): CpuPlatform.s390x
                else: CpuPlatform.none
     ## the CPU this program will run on.
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 0649f1176..e7eb6ac82 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -60,13 +60,13 @@ proc captureStackTrace(f: PFrame, st: var StackTrace) =
     b = b.prev
 
 var
-  profilingRequestedHook*: proc (): bool {.nimcall, locks: 0, gcsafe.}
+  profilingRequestedHook*: proc (): bool {.nimcall, gcsafe.}
     ## set this variable to provide a procedure that implements a profiler in
     ## user space. See the `nimprof` module for a reference implementation.
 
 when defined(memProfiler):
   type
-    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, locks: 0, gcsafe.}
+    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, gcsafe.}
 
   var
     profilerHook*: MemProfilerHook
diff --git a/lib/system/rawquits.nim b/lib/system/rawquits.nim
new file mode 100644
index 000000000..f0ead10c6
--- /dev/null
+++ b/lib/system/rawquits.nim
@@ -0,0 +1,27 @@
+import system/ctypes
+
+when defined(nimNoQuit):
+  proc rawQuit(errorcode: int = QuitSuccess) = discard "ignoring quit"
+
+elif defined(genode):
+  import genode/env
+
+  var systemEnv {.exportc: runtimeEnvSym.}: GenodeEnvPtr
+
+  type GenodeEnv = GenodeEnvPtr
+    ## Opaque type representing Genode environment.
+
+  proc rawQuit(env: GenodeEnv; errorcode: int) {.magic: "Exit", noreturn,
+    importcpp: "#->parent().exit(@); Genode::sleep_forever()", header: "<base/sleep.h>".}
+
+  proc rawQuit(errorcode: int = QuitSuccess) {.inline, noreturn.} =
+    systemEnv.rawQuit(errorcode)
+
+
+elif defined(js) and defined(nodejs) and not defined(nimscript):
+  proc rawQuit(errorcode: int = QuitSuccess) {.magic: "Exit",
+    importc: "process.exit", noreturn.}
+
+else:
+  proc rawQuit(errorcode: cint) {.
+    magic: "Exit", importc: "exit", header: "<stdlib.h>", noreturn.}
\ No newline at end of file
diff --git a/lib/system/refs_v2.nim b/lib/system/refs_v2.nim
deleted file mode 100644
index df1248586..000000000
--- a/lib/system/refs_v2.nim
+++ /dev/null
@@ -1,237 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2019 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-#[
-In this new runtime we simplify the object layouts a bit: The runtime type
-information is only accessed for the objects that have it and it's always
-at offset 0 then. The ``ref`` object header is independent from the
-runtime type and only contains a reference count.
-
-Object subtyping is checked via the generated 'name'. This should have
-comparable overhead to the old pointer chasing approach but has the benefit
-that it works across DLL boundaries.
-
-The generated name is a concatenation of the object names in the hierarchy
-so that a subtype check becomes a substring check. For example::
-
-  type
-    ObjectA = object of RootObj
-    ObjectB = object of ObjectA
-
-ObjectA's ``name`` is "|ObjectA|RootObj|".
-ObjectB's ``name`` is "|ObjectB|ObjectA|RootObj|".
-
-Now to check for ``x of ObjectB`` we need to check
-for ``x.typ.name.hasSubstring("|ObjectB|")``. In the actual implementation,
-however, we could also use a
-hash of ``package & "." & module & "." & name`` to save space.
-
-]#
-
-when defined(gcOrc):
-  const
-    rcIncrement = 0b10000 # so that lowest 4 bits are not touched
-    rcMask = 0b1111
-    rcShift = 4      # shift by rcShift to get the reference counter
-
-else:
-  const
-    rcIncrement = 0b1000 # so that lowest 3 bits are not touched
-    rcMask = 0b111
-    rcShift = 3      # shift by rcShift to get the reference counter
-
-type
-  RefHeader = object
-    rc: int # the object header is now a single RC field.
-            # we could remove it in non-debug builds for the 'owned ref'
-            # design but this seems unwise.
-    when defined(gcOrc):
-      rootIdx: int # thanks to this we can delete potential cycle roots
-                   # in O(1) without doubly linked lists
-    when defined(nimArcDebug):
-      refId: int
-
-  Cell = ptr RefHeader
-
-template `+!`(p: pointer, s: int): pointer =
-  cast[pointer](cast[int](p) +% s)
-
-template `-!`(p: pointer, s: int): pointer =
-  cast[pointer](cast[int](p) -% s)
-
-template head(p: pointer): Cell =
-  cast[Cell](cast[int](p) -% sizeof(RefHeader))
-
-const
-  traceCollector = defined(traceArc)
-
-when defined(nimArcDebug):
-  include cellsets
-
-  const traceId = 7739 # 1037
-
-  var gRefId: int
-  var freedCells: CellSet
-
-proc nimNewObj(size: int): pointer {.compilerRtl.} =
-  let s = size + sizeof(RefHeader)
-  when defined(nimscript):
-    discard
-  elif defined(useMalloc):
-    var orig = c_malloc(cuint s)
-    nimZeroMem(orig, s)
-    result = orig +! sizeof(RefHeader)
-  elif compileOption("threads"):
-    result = allocShared0(s) +! sizeof(RefHeader)
-  else:
-    result = alloc0(s) +! sizeof(RefHeader)
-  when defined(nimArcDebug):
-    head(result).refId = gRefId
-    atomicInc gRefId
-  when traceCollector:
-    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
-
-proc nimNewObjUninit(size: int): pointer {.compilerRtl.} =
-  # Same as 'newNewObj' but do not initialize the memory to zero.
-  # The codegen proved for us that this is not necessary.
-  let s = size + sizeof(RefHeader)
-  when defined(nimscript):
-    discard
-  elif defined(useMalloc):
-    var orig = cast[ptr RefHeader](c_malloc(cuint s))
-  elif compileOption("threads"):
-    var orig = cast[ptr RefHeader](allocShared(s))
-  else:
-    var orig = cast[ptr RefHeader](alloc(s))
-  orig.rc = 0
-  when defined(gcOrc):
-    orig.rootIdx = 0
-  result = orig +! sizeof(RefHeader)
-  when defined(nimArcDebug):
-    head(result).refId = gRefId
-    atomicInc gRefId
-  when traceCollector:
-    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
-
-proc nimDecWeakRef(p: pointer) {.compilerRtl, inl.} =
-  dec head(p).rc, rcIncrement
-
-proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
-  when defined(nimArcDebug):
-    if head(p).refId == traceId:
-      writeStackTrace()
-      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).rc shr rcShift)
-
-  inc head(p).rc, rcIncrement
-  when traceCollector:
-    cprintf("[INCREF] %p\n", head(p))
-
-when not defined(nimscript) and defined(nimArcDebug):
-  proc deallocatedRefId*(p: pointer): int =
-    ## Returns the ref's ID if the ref was already deallocated. This
-    ## is a memory corruption check. Returns 0 if there is no error.
-    let c = head(p)
-    if freedCells.data != nil and freedCells.contains(c):
-      result = c.refId
-    else:
-      result = 0
-
-proc nimRawDispose(p: pointer) {.compilerRtl.} =
-  when not defined(nimscript):
-    when traceCollector:
-      cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
-    when defined(nimOwnedEnabled):
-      if head(p).rc >= rcIncrement:
-        cstderr.rawWrite "[FATAL] dangling references exist\n"
-        quit 1
-    when defined(nimArcDebug):
-      # we do NOT really free the memory here in order to reliably detect use-after-frees
-      if freedCells.data == nil: init(freedCells)
-      freedCells.incl head(p)
-    elif defined(useMalloc):
-      c_free(p -! sizeof(RefHeader))
-    elif compileOption("threads"):
-      deallocShared(p -! sizeof(RefHeader))
-    else:
-      dealloc(p -! sizeof(RefHeader))
-
-template dispose*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x))
-#proc dispose*(x: pointer) = nimRawDispose(x)
-
-proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
-  let d = cast[ptr PNimType](p)[].destructor
-  if d != nil: cast[DestructorProc](d)(p)
-  when false:
-    cstderr.rawWrite cast[ptr PNimType](p)[].name
-    cstderr.rawWrite "\n"
-    if d == nil:
-      cstderr.rawWrite "bah, nil\n"
-    else:
-      cstderr.rawWrite "has destructor!\n"
-  nimRawDispose(p)
-
-when defined(gcOrc):
-  when defined(nimThinout):
-    include cyclebreaker
-  else:
-    include cyclicrefs_bacon
-    #include cyclecollector
-    #include cyclicrefs_v2
-
-proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
-  if p != nil:
-    var cell = head(p)
-
-    when defined(nimArcDebug):
-      if cell.refId == traceId:
-        writeStackTrace()
-        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.rc shr rcShift)
-
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      when traceCollector:
-        cprintf("[ABOUT TO DESTROY] %p\n", cell)
-    else:
-      dec cell.rc, rcIncrement
-      # According to Lins it's correct to do nothing else here.
-      when traceCollector:
-        cprintf("[DeCREF] %p\n", cell)
-
-proc GC_unref*[T](x: ref T) =
-  ## New runtime only supports this operation for 'ref T'.
-  if nimDecRefIsLast(cast[pointer](x)):
-    # XXX this does NOT work for virtual destructors!
-    `=destroy`(x[])
-    nimRawDispose(cast[pointer](x))
-
-proc GC_ref*[T](x: ref T) =
-  ## New runtime only supports this operation for 'ref T'.
-  if x != nil: nimIncRef(cast[pointer](x))
-
-when not defined(gcOrc):
-  template GC_fullCollect* =
-    ## Forces a full garbage collection pass. With ``--gc:arc`` a nop.
-    discard
-
-template setupForeignThreadGc* =
-  ## With ``--gc:arc`` a nop.
-  discard
-
-template tearDownForeignThreadGc* =
-  ## With ``--gc:arc`` a nop.
-  discard
-
-proc isObj(obj: PNimType, subclass: cstring): bool {.compilerRtl, inl.} =
-  proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
-
-  result = strstr(obj.name, subclass) != nil
-
-proc chckObj(obj: PNimType, subclass: cstring) {.compilerRtl.} =
-  # checks if obj is of type subclass:
-  if not isObj(obj, subclass): sysFatal(ObjectConversionDefect, "invalid object conversion")
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 318e95ebb..13118e40b 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -16,14 +16,9 @@ proc reprInt(x: int64): string {.compilerproc.} = return $x
 proc reprFloat(x: float): string {.compilerproc.} = return $x
 
 proc reprPointer(x: pointer): string {.compilerproc.} =
-  when defined(nimNoArrayToCstringConversion):
-    result = newString(60)
-    let n = c_sprintf(addr result[0], "%p", x)
-    setLen(result, n)
-  else:
-    var buf: array[0..59, char]
-    discard c_sprintf(buf, "%p", x)
-    return $buf
+  result = newString(60)
+  let n = c_snprintf(cast[cstring](addr result[0]), csize_t(60), "%p", x)
+  setLen(result, n)
 
 proc reprStrAux(result: var string, s: cstring; len: int) =
   if cast[pointer](s) == nil:
@@ -77,6 +72,8 @@ proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
 
   result = $e & " (invalid data!)"
 
+include system/repr_impl
+
 type
   PByteArray = ptr UncheckedArray[byte] # array[0xffff, byte]
 
@@ -158,7 +155,7 @@ when not defined(useNimRtl):
     var bs = typ.base.size
     for i in 0..typ.size div bs - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), typ.base, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), typ.base, cl)
     add result, "]"
 
   when defined(nimSeqsV2):
@@ -186,7 +183,7 @@ when not defined(useNimRtl):
     var bs = typ.base.size
     for i in 0..cast[PGenericSeq](p).len-1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
+      reprAux(result, cast[pointer](cast[int](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
               typ.base, cl)
     add result, "]"
 
@@ -197,14 +194,14 @@ when not defined(useNimRtl):
     of nkSlot:
       add result, $n.name
       add result, " = "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
     of nkList:
       for i in 0..n.len-1:
         if i > 0: add result, ",\n"
         reprRecordAux(result, p, n.sons[i], cl)
     of nkCase:
       var m = selectBranch(p, n)
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
       if m != nil: reprRecordAux(result, p, m, cl)
 
   proc reprRecord(result: var string, p: pointer, typ: PNimType,
@@ -286,7 +283,7 @@ when not defined(useNimRtl):
     of tyString:
       let sp = cast[ptr string](p)
       reprStrAux(result, sp[].cstring, sp[].len)
-    of tyCString:
+    of tyCstring:
       let cs = cast[ptr cstring](p)[]
       if cs.isNil: add result, "nil"
       else: reprStrAux(result, cs, cs.len)
@@ -310,7 +307,7 @@ when not defined(useNimRtl):
     var bs = elemtyp.size
     for i in 0..length - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), elemtyp, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), elemtyp, cl)
     add result, "]"
     deinitReprClosure(cl)
 
@@ -325,5 +322,6 @@ when not defined(useNimRtl):
     else:
       var p = p
       reprAux(result, addr(p), typ, cl)
-    add result, "\n"
+    when defined(nimLegacyReprWithNewline): # see PR #16034
+      add result, "\n"
     deinitReprClosure(cl)
diff --git a/lib/system/repr_impl.nim b/lib/system/repr_impl.nim
new file mode 100644
index 000000000..b9ec1890f
--- /dev/null
+++ b/lib/system/repr_impl.nim
@@ -0,0 +1,15 @@
+#[
+other APIs common to system/repr and system/reprjs could be refactored here, eg:
+* reprChar
+* reprBool
+* reprStr
+
+Another possibility in future work would be to have a single include file instead
+of system/repr and system/reprjs, and use `when defined(js)` inside it.
+]#
+
+proc reprDiscriminant*(e: int, typ: PNimType): string {.compilerRtl.} =
+  case typ.kind
+  of tyEnum: reprEnum(e, typ)
+  of tyBool: $(e != 0)
+  else: $e
diff --git a/lib/system/repr_v2.nim b/lib/system/repr_v2.nim
index fcc187a42..d2aef536c 100644
--- a/lib/system/repr_v2.nim
+++ b/lib/system/repr_v2.nim
@@ -1,57 +1,84 @@
+include system/inclrtl
+
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
 proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
   ## imported from typetraits
 
-proc distinctBase(T: typedesc): typedesc {.magic: "TypeTrait".}
+proc distinctBase(T: typedesc, recursive: static bool = true): typedesc {.magic: "TypeTrait".}
   ## imported from typetraits
 
+proc rangeBase(T: typedesc): typedesc {.magic: "TypeTrait".}
+  # skip one level of range; return the base type of a range type
+
 proc repr*(x: NimNode): string {.magic: "Repr", noSideEffect.}
 
-proc repr*(x: int): string {.magic: "IntToStr", noSideEffect.}
-  ## repr for an integer argument. Returns `x`
-  ## converted to a decimal string.
+proc repr*(x: int): string =
+  ## Same as $x
+  $x
 
-proc repr*(x: int64): string {.magic: "Int64ToStr", noSideEffect.}
-  ## repr for an integer argument. Returns `x`
-  ## converted to a decimal string.
+proc repr*(x: int64): string =
+  ## Same as $x
+  $x
 
 proc repr*(x: uint64): string {.noSideEffect.} =
-  ## repr for an unsigned integer argument. Returns `x`
-  ## converted to a decimal string.
-  $x #Calls `$` from system/strmantle.nim
+  ## Same as $x
+  $x
 
-proc repr*(x: float): string {.magic: "FloatToStr", noSideEffect.}
-  ## repr for a float argument. Returns `x`
-  ## converted to a decimal string.
+proc repr*(x: float): string =
+  ## Same as $x
+  $x
 
 proc repr*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
   ## repr for a boolean argument. Returns `x`
   ## converted to the string "false" or "true".
 
-proc repr*(x: char): string {.noSideEffect.} =
+proc repr*(x: char): string {.noSideEffect, raises: [].} =
   ## repr for a character argument. Returns `x`
-  ## converted to a string.
+  ## converted to an escaped string.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   assert repr('c') == "'c'"
-  '\'' & $x & '\''
-
-proc repr*(x: cstring): string {.noSideEffect.} =
-  ## repr for a CString argument. Returns `x`
-  ## converted to a quoted string.
-  '"' & $x & '"'
+  ##   ```
+  result = "'"
+  # Elides string creations if not needed
+  if x in {'\\', '\0'..'\31', '\127'..'\255'}:
+    result.add '\\'
+  if x in {'\0'..'\31', '\127'..'\255'}:
+    result.add $x.uint8
+  else:
+    result.add x
+  result.add '\''
 
-proc repr*(x: string): string {.noSideEffect.} =
+proc repr*(x: string | cstring): string {.noSideEffect, raises: [].} =
   ## repr for a string argument. Returns `x`
-  ## but quoted.
-  '"' & x & '"'
+  ## converted to a quoted and escaped string.
+  result = "\""
+  for i in 0..<x.len:
+    if x[i] in {'"', '\\', '\0'..'\31', '\127'..'\255'}:
+      result.add '\\'
+    case x[i]:
+    of '\n':
+      result.add "n\n"
+    of '\0'..'\9', '\11'..'\31', '\127'..'\255':
+      result.add $x[i].uint8
+    else:
+      result.add x[i]
+  result.add '\"'
 
-proc repr*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect.}
+proc repr*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect, raises: [].}
   ## repr for an enumeration argument. This works for
   ## any enumeration type thanks to compiler magic.
   ##
   ## If a `repr` operator for a concrete enumeration is provided, this is
   ## used instead. (In other words: *Overwriting* is possible.)
 
+proc reprDiscriminant*(e: int): string {.compilerproc.} =
+  # repr and reprjs can use `PNimType` to symbolize `e`; making this work here
+  # would require a way to pass the set of enum stringified values to cgen.
+  $e
+
 proc repr*(p: pointer): string =
   ## repr of pointer as its hexadecimal value
   if p == nil:
@@ -68,12 +95,21 @@ proc repr*(p: pointer): string =
         result[j] = HexChars[n and 0xF]
         n = n shr 4
 
-template repr*(x: distinct): string =
-  repr(distinctBase(typeof(x))(x))
+proc repr*(p: proc | iterator {.closure.}): string =
+  ## repr of a proc as its address
+  repr(cast[ptr pointer](unsafeAddr p)[])
+
+template repr*[T: distinct|(range and not enum)](x: T): string =
+  when T is range: # add a branch to handle range
+    repr(rangeBase(typeof(x))(x))
+  elif T is distinct:
+    repr(distinctBase(typeof(x))(x))
+  else:
+    {.error: "cannot happen".}
 
 template repr*(t: typedesc): string = $t
 
-proc reprObject[T: tuple|object](res: var string, x: T) =
+proc reprObject[T: tuple|object](res: var string, x: T) {.noSideEffect, raises: [].} =
   res.add '('
   var firstElement = true
   const isNamed = T is object or isNamedTuple(T)
@@ -94,19 +130,19 @@ proc reprObject[T: tuple|object](res: var string, x: T) =
   res.add(')')
 
 
-proc repr*[T: tuple|object](x: T): string =
+proc repr*[T: tuple|object](x: T): string {.noSideEffect, raises: [].} =
   ## Generic `repr` operator for tuples that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(23, 45) == "(23, 45)"
   ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
   ##   $() == "()"
+  ##   ```
   when T is object:
     result = $typeof(x)
   reprObject(result, x)
 
-proc repr*[T](x: ref T | ptr T): string =
+proc repr*[T](x: ref T | ptr T): string {.noSideEffect, raises: [].} =
   if isNil(x): return "nil"
   when T is object:
     result = $typeof(x)
@@ -115,7 +151,7 @@ proc repr*[T](x: ref T | ptr T): string =
     result = when typeof(x) is ref: "ref " else: "ptr "
     result.add repr(x[])
 
-proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string =
+proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string {.noSideEffect, raises: [].} =
   result = prefix
   var firstElement = true
   for value in items(x):
@@ -129,29 +165,19 @@ proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string =
 proc repr*[T](x: set[T]): string =
   ## Generic `repr` operator for sets that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   ${23, 45} == "{23, 45}"
+  ##   ```
   collectionToRepr(x, "{", ", ", "}")
 
 proc repr*[T](x: seq[T]): string =
   ## Generic `repr` operator for seqs that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
   collectionToRepr(x, "@[", ", ", "]")
 
-proc repr*[T, U](x: HSlice[T, U]): string =
-  ## Generic `repr` operator for slices that is lifted from the components
-  ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
-  ##  $(1 .. 5) == "1 .. 5"
-  result = repr(x.a)
-  result.add(" .. ")
-  result.add(repr(x.b))
-
 proc repr*[T, IDX](x: array[IDX, T]): string =
   ## Generic `repr` operator for arrays that is lifted from the components.
   collectionToRepr(x, "[", ", ", "]")
@@ -159,7 +185,10 @@ proc repr*[T, IDX](x: array[IDX, T]): string =
 proc repr*[T](x: openArray[T]): string =
   ## Generic `repr` operator for openarrays that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
   collectionToRepr(x, "[", ", ", "]")
+
+proc repr*[T](x: UncheckedArray[T]): string =
+  "[...]"
diff --git a/lib/system/reprjs.nim b/lib/system/reprjs.nim
index 9c27a4721..761d66aec 100644
--- a/lib/system/reprjs.nim
+++ b/lib/system/reprjs.nim
@@ -8,7 +8,12 @@
 #
 # The generic ``repr`` procedure for the javascript backend.
 
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
 proc reprInt(x: int64): string {.compilerproc.} = $x
+proc reprInt(x: uint64): string {.compilerproc.} = $x
+proc reprInt(x: int): string {.compilerproc.} = $x
 proc reprFloat(x: float): string {.compilerproc.} = $x
 
 proc reprPointer(p: pointer): string {.compilerproc.} =
@@ -24,12 +29,14 @@ proc reprBool(x: bool): string {.compilerRtl.} =
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
   var tmp: bool
   let item = typ.node.sons[e]
-  {.emit: "`tmp` = `item` !== undefined".}
+  {.emit: "`tmp` = `item` !== undefined;".}
   if tmp:
     result = makeNimstrLit(item.name)
   else:
     result = $e & " (invalid data!)"
 
+include system/repr_impl
+
 proc reprChar(x: char): string {.compilerRtl.} =
   result = "\'"
   case x
@@ -113,7 +120,6 @@ proc reprArray(a: pointer, typ: PNimType,
   # We prepend @ to seq, the C backend prepends the pointer to the seq.
   result = if typ.kind == tySequence: "@[" else: "["
   var len: int = 0
-  var i: int = 0
 
   {. emit: "`len` = `a`.length;\n" .}
   var dereffed: pointer = a
@@ -130,7 +136,7 @@ proc reprArray(a: pointer, typ: PNimType,
   add(result, "]")
 
 proc isPointedToNil(p: pointer): bool =
-  {. emit: "if (`p` === null) {`result` = true};\n" .}
+  {. emit: "if (`p` === null) {`result` = true;}\n" .}
 
 proc reprRef(result: var string, p: pointer, typ: PNimType,
           cl: var ReprClosure) =
@@ -188,8 +194,12 @@ proc reprAux(result: var string, p: pointer, typ: PNimType,
     return
   dec(cl.recDepth)
   case typ.kind
-  of tyInt..tyInt64, tyUInt..tyUInt64:
+  of tyInt..tyInt32, tyUInt..tyUInt32:
     add(result, reprInt(cast[int](p)))
+  of tyInt64:
+    add(result, reprInt(cast[int64](p)))
+  of tyUInt64:
+    add(result, reprInt(cast[uint64](p)))
   of tyChar:
     add(result, reprChar(cast[char](p)))
   of tyBool:
@@ -200,7 +210,7 @@ proc reprAux(result: var string, p: pointer, typ: PNimType,
     var fp: int
     {. emit: "`fp` = `p`;\n" .}
     add(result, reprStr(cast[string](p)))
-  of tyCString:
+  of tyCstring:
     var fp: cstring
     {. emit: "`fp` = `p`;\n" .}
     if fp.isNil:
@@ -237,4 +247,5 @@ proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.} =
   var cl: ReprClosure
   initReprClosure(cl)
   reprAux(result, p, typ, cl)
-  add(result, "\n")
+  when defined(nimLegacyReprWithNewline): # see PR #16034
+    add result, "\n"
diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim
index 1b40c00ab..572e77408 100644
--- a/lib/system/seqs_v2.nim
+++ b/lib/system/seqs_v2.nim
@@ -8,10 +8,13 @@
 #
 
 
-# import typetraits
+# import std/typetraits
 # strs already imported allocateds for us.
 
-proc supportsCopyMem(t: typedesc): bool {.magic: "TypeTrait".}
+
+# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
+{.push warning[StrictNotNil]: off.}  # See https://github.com/nim-lang/Nim/issues/21401
+
 
 ## Default seq implementation used by Nim's core.
 type
@@ -22,10 +25,15 @@ type
     cap: int
     data: UncheckedArray[T]
 
-  NimSeqV2*[T] = object
+  NimSeqV2*[T] = object # \
+    # if you change this implementation, also change seqs_v2_reimpl.nim!
     len: int
     p: ptr NimSeqPayload[T]
 
+  NimRawSeq = object
+    len: int
+    p: pointer
+
 const nimSeqVersion {.core.} = 2
 
 # XXX make code memory safe for overflows in '*'
@@ -34,18 +42,30 @@ proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises
   # we have to use type erasure here as Nim does not support generic
   # compilerProcs. Oh well, this will all be inlined anyway.
   if cap > 0:
-    var p = cast[ptr NimSeqPayloadBase](allocShared0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize))
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
+proc newSeqPayloadUninit(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # Used in `newSeqOfCap()`.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
     p.cap = cap
     result = p
   else:
     result = nil
 
+template `+!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) +% s)
+
+template `-!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) -% s)
+
 proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
-    noSideEffect, raises: [].} =
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
   {.noSideEffect.}:
-    template `+!`(p: pointer, s: int): pointer =
-      cast[pointer](cast[int](p) +% s)
-
     let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
     if addlen <= 0:
       result = p
@@ -57,59 +77,142 @@ proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): poin
       var p = cast[ptr NimSeqPayloadBase](p)
       let oldCap = p.cap and not strlitFlag
       let newCap = max(resize(oldCap), len+addlen)
+      var q: ptr NimSeqPayloadBase
       if (p.cap and strlitFlag) == strlitFlag:
-        var q = cast[ptr NimSeqPayloadBase](allocShared0(headerSize + elemSize * newCap))
+        q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+
+      zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+      q.cap = newCap
+      result = q
+
+proc zeroNewElements(len: int; q: pointer; addlen, elemSize, elemAlign: int) {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+
+proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayloadUninit(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
+      if (p.cap and strlitFlag) == strlitFlag:
+        var q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
         copyMem(q +! headerSize, p +! headerSize, len * elemSize)
         q.cap = newCap
         result = q
       else:
         let oldSize = headerSize + elemSize * oldCap
         let newSize = headerSize + elemSize * newCap
-        var q = cast[ptr NimSeqPayloadBase](reallocShared0(p, oldSize, newSize))
+        var q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
         q.cap = newCap
         result = q
 
-proc shrink*[T](x: var seq[T]; newLen: Natural) =
+proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [].} =
   when nimvm:
-    setLen(x, newLen)
+    {.cast(tags: []).}:
+      setLen(x, newLen)
   else:
-    mixin `=destroy`
     #sysAssert newLen <= x.len, "invalid newLen parameter for 'shrink'"
     when not supportsCopyMem(T):
       for i in countdown(x.len - 1, newLen):
-        `=destroy`(x[i])
+        reset x[i]
     # XXX This is wrong for const seqs that were moved into 'x'!
-    cast[ptr NimSeqV2[T]](addr x).len = newLen
+    {.noSideEffect.}:
+      cast[ptr NimSeqV2[T]](addr x).len = newLen
 
-proc grow*[T](x: var seq[T]; newLen: Natural; value: T) =
+proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
   let oldLen = x.len
+  #sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
   if newLen <= oldLen: return
   var xu = cast[ptr NimSeqV2[T]](addr x)
-  if xu.p == nil or xu.p.cap < newLen:
-    xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
+  if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
+    xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
   xu.len = newLen
   for i in oldLen .. newLen-1:
-    xu.p.data[i] = value
+    wasMoved(xu.p.data[i])
+    `=copy`(xu.p.data[i], value)
 
-proc add*[T](x: var seq[T]; value: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
+proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
   ## Generic proc for adding a data item `y` to a container `x`.
   ##
   ## For containers that have an order, `add` means *append*. New generic
   ## containers should also call their adding proc `add` for consistency.
   ## Generic code becomes much easier to write if the Nim naming scheme is
   ## respected.
-  let oldLen = x.len
-  var xu = cast[ptr NimSeqV2[T]](addr x)
-  if xu.p == nil or xu.p.cap < oldLen+1:
-    xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, 1, sizeof(T), alignof(T)))
-  xu.len = oldLen+1
-  # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
-  # copyMem(). This is fine as know by construction that
-  # in `xu.p.data[oldLen]` there is nothing to destroy.
-  # We also save the `wasMoved + destroy` pair for the sink parameter.
-  xu.p.data[oldLen] = value
-
-proc setLen[T](s: var seq[T], newlen: Natural) =
+  {.cast(noSideEffect).}:
+    let oldLen = x.len
+    var xu = cast[ptr NimSeqV2[T]](addr x)
+    if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
+      xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
+    xu.len = oldLen+1
+    # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
+    # copyMem(). This is fine as know by construction that
+    # in `xu.p.data[oldLen]` there is nothing to destroy.
+    # We also save the `wasMoved + destroy` pair for the sink parameter.
+    xu.p.data[oldLen] = y
+
+proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+      for i in oldLen..<newlen:
+        xu.p.data[i] = default(T)
+
+proc newSeq[T](s: var seq[T], len: Natural) =
+  shrink(s, 0)
+  setLen(s, len)
+
+proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
+  result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
+
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[ptr NimSeqV2[T]](unsafeAddr self)
+  result = if sek.p != nil: sek.p.cap and not strlitFlag else: 0
+
+func setLenUninit*[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  ## Sets the length of seq `s` to `newlen`. `T` may be any sequence type.
+  ## New slots will not be initialized.
+  ##
+  ## If the current length is greater than the new length,
+  ## `s` will be truncated.
+  ##   ```nim
+  ##   var x = @[10, 20]
+  ##   x.setLenUninit(5)
+  ##   x[4] = 50
+  ##   assert x[4] == 50
+  ##   x.setLenUninit(1)
+  ##   assert x == @[10]
+  ##   ```
   {.noSideEffect.}:
     if newlen < s.len:
       shrink(s, newlen)
@@ -117,6 +220,8 @@ proc setLen[T](s: var seq[T], newlen: Natural) =
       let oldLen = s.len
       if newlen <= oldLen: return
       var xu = cast[ptr NimSeqV2[T]](addr s)
-      if xu.p == nil or xu.p.cap < newlen:
-        xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
       xu.len = newlen
+
+{.pop.}  # See https://github.com/nim-lang/Nim/issues/21401
diff --git a/lib/system/seqs_v2_reimpl.nim b/lib/system/seqs_v2_reimpl.nim
new file mode 100644
index 000000000..09b7e7ac4
--- /dev/null
+++ b/lib/system/seqs_v2_reimpl.nim
@@ -0,0 +1,24 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+type
+  NimSeqPayloadReimpl = object
+    cap: int
+    data: pointer
+
+  NimSeqV2Reimpl = object
+    len: int
+    p: ptr NimSeqPayloadReimpl
+
+template frees(s: NimSeqV2Reimpl) =
+  if s.p != nil and (s.p.cap and strlitFlag) != strlitFlag:
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
\ No newline at end of file
diff --git a/lib/system/setops.nim b/lib/system/setops.nim
index 97f042d93..67aa3097a 100644
--- a/lib/system/setops.nim
+++ b/lib/system/setops.nim
@@ -1,94 +1,89 @@
-proc incl*[T](x: var set[T], y: T) {.magic: "Incl", noSideEffect.}
-  ## Includes element ``y`` in the set ``x``.
+func incl*[T](x: var set[T], y: T) {.magic: "Incl".} =
+  ## Includes element `y` in the set `x`.
   ##
-  ## This is the same as ``x = x + {y}``, but it might be more efficient.
-  ##
-  ## .. code-block:: Nim
-  ##   var a = {1, 3, 5}
-  ##   a.incl(2) # a <- {1, 2, 3, 5}
-  ##   a.incl(4) # a <- {1, 2, 3, 4, 5}
+  ## This is the same as `x = x + {y}`, but it might be more efficient.
+  runnableExamples:
+    var a = {1, 3, 5}
+    a.incl(2)
+    assert a == {1, 2, 3, 5}
+    a.incl(4)
+    assert a == {1, 2, 3, 4, 5}
 
-template incl*[T](x: var set[T], y: set[T]) =
-  ## Includes the set ``y`` in the set ``x``.
-  ##
-  ## .. code-block:: Nim
-  ##   var a = {1, 3, 5, 7}
-  ##   var b = {4, 5, 6}
-  ##   a.incl(b)  # a <- {1, 3, 4, 5, 6, 7}
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template incl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Includes the set `y` in the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {4, 5, 6}
+    a.incl(b)
+    assert a == {1, 3, 4, 5, 6, 7}
   x = x + y
 
-proc excl*[T](x: var set[T], y: T) {.magic: "Excl", noSideEffect.}
-  ## Excludes element ``y`` from the set ``x``.
-  ##
-  ## This is the same as ``x = x - {y}``, but it might be more efficient.
+func excl*[T](x: var set[T], y: T) {.magic: "Excl".} =
+  ## Excludes element `y` from the set `x`.
   ##
-  ## .. code-block:: Nim
-  ##   var b = {2, 3, 5, 6, 12, 545}
-  ##   b.excl(5)  # b <- {2, 3, 6, 12, 545}
+  ## This is the same as `x = x - {y}`, but it might be more efficient.
+  runnableExamples:
+    var b = {2, 3, 5, 6, 12, 54}
+    b.excl(5)
+    assert b == {2, 3, 6, 12, 54}
 
-template excl*[T](x: var set[T], y: set[T]) =
-  ## Excludes the set ``y`` from the set ``x``.
-  ##
-  ## .. code-block:: Nim
-  ##   var a = {1, 3, 5, 7}
-  ##   var b = {3, 4, 5}
-  ##   a.excl(b)  # a <- {1, 7}
+template excl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Excludes the set `y` from the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {3, 4, 5}
+    a.excl(b) 
+    assert a == {1, 7}
   x = x - y
 
-proc card*[T](x: set[T]): int {.magic: "Card", noSideEffect.}
-  ## Returns the cardinality of the set ``x``, i.e. the number of elements
+func card*[T](x: set[T]): int {.magic: "Card".} =
+  ## Returns the cardinality of the set `x`, i.e. the number of elements
   ## in the set.
-  ##
-  ## .. code-block:: Nim
-  ##   var a = {1, 3, 5, 7}
-  ##   echo card(a) # => 4
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    assert card(a) == 4
+    var b = {1, 3, 5, 7, 5}
+    assert card(b) == 4 # repeated 5 doesn't count
 
-proc len*[T](x: set[T]): int {.magic: "Card", noSideEffect.}
+func len*[T](x: set[T]): int {.magic: "Card".}
   ## An alias for `card(x)`.
 
 
-proc `*`*[T](x, y: set[T]): set[T] {.magic: "MulSet", noSideEffect.}
+func `*`*[T](x, y: set[T]): set[T] {.magic: "MulSet".} =
   ## This operator computes the intersection of two sets.
-  ##
-  ## .. code-block:: Nim
-  ##   let
-  ##     a = {1, 2, 3}
-  ##     b = {2, 3, 4}
-  ##   echo a * b # => {2, 3}
-proc `+`*[T](x, y: set[T]): set[T] {.magic: "PlusSet", noSideEffect.}
+  runnableExamples:
+    assert {1, 2, 3} * {2, 3, 4} == {2, 3}
+
+func `+`*[T](x, y: set[T]): set[T] {.magic: "PlusSet".} =
   ## This operator computes the union of two sets.
-  ##
-  ## .. code-block:: Nim
-  ##   let
-  ##     a = {1, 2, 3}
-  ##     b = {2, 3, 4}
-  ##   echo a + b # => {1, 2, 3, 4}
-proc `-`*[T](x, y: set[T]): set[T] {.magic: "MinusSet", noSideEffect.}
+  runnableExamples:
+    assert {1, 2, 3} + {2, 3, 4} == {1, 2, 3, 4}
+
+func `-`*[T](x, y: set[T]): set[T] {.magic: "MinusSet".} =
   ## This operator computes the difference of two sets.
-  ##
-  ## .. code-block:: Nim
-  ##   let
-  ##     a = {1, 2, 3}
-  ##     b = {2, 3, 4}
-  ##   echo a - b # => {1}
+  runnableExamples:
+    assert {1, 2, 3} - {2, 3, 4} == {1}
 
-proc contains*[T](x: set[T], y: T): bool {.magic: "InSet", noSideEffect.}
-  ## One should overload this proc if one wants to overload the ``in`` operator.
+func contains*[T](x: set[T], y: T): bool {.magic: "InSet".} =
+  ## One should overload this proc if one wants to overload the `in` operator.
   ##
-  ## The parameters are in reverse order! ``a in b`` is a template for
-  ## ``contains(b, a)``.
+  ## The parameters are in reverse order! `a in b` is a template for
+  ## `contains(b, a)`.
   ## This is because the unification algorithm that Nim uses for overload
   ## resolution works from left to right.
-  ## But for the ``in`` operator that would be the wrong direction for this
+  ## But for the `in` operator that would be the wrong direction for this
   ## piece of code:
-  ##
-  ## .. code-block:: Nim
-  ##   var s: set[range['a'..'z']] = {'a'..'c'}
-  ##   assert s.contains('c')
-  ##   assert 'b' in s
-  ##
-  ## If ``in`` had been declared as ``[T](elem: T, s: set[T])`` then ``T`` would
-  ## have been bound to ``char``. But ``s`` is not compatible to type
-  ## ``set[char]``! The solution is to bind ``T`` to ``range['a'..'z']``. This
-  ## is achieved by reversing the parameters for ``contains``; ``in`` then
+  runnableExamples:
+    var s: set[range['a'..'z']] = {'a'..'c'}
+    assert s.contains('c')
+    assert 'b' in s
+    assert 'd' notin s
+    assert set['a'..'z'] is set[range['a'..'z']]
+  ## If `in` had been declared as `[T](elem: T, s: set[T])` then `T` would
+  ## have been bound to `char`. But `s` is not compatible to type
+  ## `set[char]`! The solution is to bind `T` to `range['a'..'z']`. This
+  ## is achieved by reversing the parameters for `contains`; `in` then
   ## passes its arguments in reverse order.
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 42c448848..97431c296 100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -9,34 +9,20 @@
 
 # set handling
 
-type
-  NimSet = array[0..4*2048-1, uint8]
 
-# bitops can't be imported here, therefore the code duplication.
-
-proc countBits32(n: uint32): int {.compilerproc.} =
-  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  var v = uint32(n)
-  v = v - ((v shr 1'u32) and 0x55555555'u32)
-  v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
-  result = (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
-
-proc countBits64(n: uint64): int {.compilerproc, inline.} =
-  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  var v = uint64(n)
-  v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
-  v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
-  v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
-  result = ((v * 0x0101010101010101'u64) shr 56'u64).int
-
-proc cardSet(s: NimSet, len: int): int {.compilerproc, inline.} =
+proc cardSetImpl(s: ptr UncheckedArray[uint8], len: int): int {.inline.} =
   var i = 0
   result = 0
+  var num = 0'u64
   when defined(x86) or defined(amd64):
     while i < len - 8:
-      inc(result, countBits64((cast[ptr uint64](s[i].unsafeAddr))[]))
+      copyMem(addr num, addr s[i], 8)
+      inc(result, countBits64(num))
       inc(i, 8)
 
   while i < len:
     inc(result, countBits32(uint32(s[i])))
     inc(i, 1)
+
+proc cardSet(s: ptr UncheckedArray[uint8], len: int): int {.compilerproc, inline.} =
+  result = cardSetImpl(s, len)
diff --git a/lib/system/stacktraces.nim b/lib/system/stacktraces.nim
new file mode 100644
index 000000000..42be9d94f
--- /dev/null
+++ b/lib/system/stacktraces.nim
@@ -0,0 +1,83 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Additional code for customizable stack traces. Unstable API, for internal
+# usage only.
+
+const
+  reraisedFromBegin* = -10
+  reraisedFromEnd* = -100
+  maxStackTraceLines* = 128
+
+when defined(nimStackTraceOverride):
+  ## Procedure types for overriding the default stack trace.
+  type
+    cuintptr_t* {.importc: "uintptr_t", nodecl.} = uint
+      ## This is the same as the type `uintptr_t` in C.
+
+    StackTraceOverrideGetTracebackProc* = proc (): string {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetProgramCountersProc* = proc (maxLength: cint): seq[cuintptr_t] {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetDebuggingInfoProc* =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.}
+
+  # Default procedures (not normally used, because people opting in on this
+  # override are supposed to register their own versions).
+  var
+    stackTraceOverrideGetTraceback: StackTraceOverrideGetTracebackProc =
+      proc (): string {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+        #result = "Stack trace override procedure not registered.\n"
+    stackTraceOverrideGetProgramCounters: StackTraceOverrideGetProgramCountersProc =
+      proc (maxLength: cint): seq[cuintptr_t] {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+    stackTraceOverrideGetDebuggingInfo: StackTraceOverrideGetDebuggingInfoProc =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.} =
+          discard
+
+  # Custom procedure registration.
+  proc registerStackTraceOverride*(overrideProc: StackTraceOverrideGetTracebackProc) =
+    ## Override the default stack trace inside rawWriteStackTrace() with your
+    ## own procedure.
+    stackTraceOverrideGetTraceback = overrideProc
+  proc registerStackTraceOverrideGetProgramCounters*(overrideProc: StackTraceOverrideGetProgramCountersProc) =
+    stackTraceOverrideGetProgramCounters = overrideProc
+  proc registerStackTraceOverrideGetDebuggingInfo*(overrideProc: StackTraceOverrideGetDebuggingInfoProc) =
+    stackTraceOverrideGetDebuggingInfo = overrideProc
+
+  # Custom stack trace manipulation.
+  proc auxWriteStackTraceWithOverride*(s: var string) =
+    add(s, stackTraceOverrideGetTraceback())
+
+  proc auxWriteStackTraceWithOverride*(s: var seq[StackTraceEntry]) =
+    let programCounters = stackTraceOverrideGetProgramCounters(maxStackTraceLines)
+    if s.len == 0:
+      s = newSeqOfCap[StackTraceEntry](programCounters.len)
+    for programCounter in programCounters:
+      s.add(StackTraceEntry(programCounter: cast[uint](programCounter)))
+
+  # We may have more stack trace lines in the output, due to inlined procedures.
+  proc addDebuggingInfo*(s: seq[StackTraceEntry]): seq[StackTraceEntry] =
+    var programCounters: seq[cuintptr_t]
+    # We process program counters in groups from complete stack traces, because
+    # we have logic that keeps track of certain functions being inlined or not.
+    for entry in s:
+      if entry.procname.isNil and entry.programCounter != 0:
+        programCounters.add(cast[cuintptr_t](entry.programCounter))
+      elif entry.procname.isNil and (entry.line == reraisedFromBegin or entry.line == reraisedFromEnd):
+        result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
+        programCounters = @[]
+        result.add(entry)
+      else:
+        result.add(entry)
+    if programCounters.len > 0:
+      result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
diff --git a/lib/system/strmantle.nim b/lib/system/strmantle.nim
index 43a769b5f..89046253b 100644
--- a/lib/system/strmantle.nim
+++ b/lib/system/strmantle.nim
@@ -9,17 +9,28 @@
 
 # Compilerprocs for strings that do not depend on the string implementation.
 
+import std/private/digitsutils
+
+
 proc cmpStrings(a, b: string): int {.inline, compilerproc.} =
   let alen = a.len
   let blen = b.len
   let minlen = min(alen, blen)
   if minlen > 0:
-    result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen))
+    result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen)).int
     if result == 0:
       result = alen - blen
   else:
     result = alen - blen
 
+proc leStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) <= 0
+
+proc ltStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) < 0
+
 proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
   let alen = a.len
   let blen = b.len
@@ -30,7 +41,7 @@ proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
 proc hashString(s: string): int {.compilerproc.} =
   # the compiler needs exactly the same hash function!
   # this used to be used for efficient generation of string case statements
-  var h : uint = 0
+  var h = 0'u
   for i in 0..len(s)-1:
     h = h + uint(s[i])
     h = h + h shl 10
@@ -40,63 +51,28 @@ proc hashString(s: string): int {.compilerproc.} =
   h = h + h shl 15
   result = cast[int](h)
 
-proc addInt*(result: var string; x: int64) =
-  ## Converts integer to its string representation and appends it to `result`.
-  ##
-  ## .. code-block:: Nim
-  ##   var
-  ##     a = "123"
-  ##     b = 45
-  ##   a.addInt(b) # a <- "12345"
-  let base = result.len
-  setLen(result, base + sizeof(x)*4)
+proc eqCstrings(a, b: cstring): bool {.inline, compilerproc.} =
+  if pointer(a) == pointer(b): result = true
+  elif a.isNil or b.isNil: result = false
+  else: result = c_strcmp(a, b) == 0
+
+proc hashCstring(s: cstring): int {.compilerproc.} =
+  # the compiler needs exactly the same hash function!
+  # this used to be used for efficient generation of cstring case statements
+  if s.isNil: return 0
+  var h : uint = 0
   var i = 0
-  var y = x
   while true:
-    var d = y div 10
-    result[base+i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[base+i] = '-'
-    inc(i)
-  setLen(result, base+i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[base+j], result[base+i-j-1])
-
-proc nimIntToStr(x: int): string {.compilerRtl.} =
-  result = newStringOfCap(sizeof(x)*4)
-  result.addInt x
-
-proc addCstringN(result: var string, buf: cstring; buflen: int) =
-  # no nimvm support needed, so it doesn't need to be fast here either
-  let oldLen = result.len
-  let newLen = oldLen + buflen
-  result.setLen newLen
-  copyMem(result[oldLen].addr, buf, buflen)
-
-import formatfloat
-
-proc addFloat*(result: var string; x: float) =
-  ## Converts float to its string representation and appends it to `result`.
-  ##
-  ## .. code-block:: Nim
-  ##   var
-  ##     a = "123"
-  ##     b = 45.67
-  ##   a.addFloat(b) # a <- "12345.67"
-  when nimvm:
-    result.add $x
-  else:
-    var buffer {.noinit.}: array[65, char]
-    let n = writeFloatToBuffer(buffer, x)
-    result.addCstringN(cstring(buffer[0].addr), n)
-
-proc nimFloatToStr(f: float): string {.compilerproc.} =
-  result = newStringOfCap(8)
-  result.addFloat f
+    let c = s[i]
+    if c == '\0': break
+    h = h + uint(c)
+    h = h + h shl 10
+    h = h xor (h shr 6)
+    inc i
+  h = h + h shl 3
+  h = h xor (h shr 11)
+  h = h + h shl 15
+  result = cast[int](h)
 
 proc c_strtod(buf: cstring, endptr: ptr cstring): float64 {.
   importc: "strtod", header: "<stdlib.h>", noSideEffect.}
@@ -107,20 +83,20 @@ const
               1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
               1e20, 1e21, 1e22]
 
-when defined(nimHasInvariant):
-  {.push staticBoundChecks: off.}
 
-proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
-                          start = 0): int {.compilerproc.} =
+{.push staticBoundChecks: off.}
+
+proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
+                         ): int {.compilerproc.} =
   # This routine attempt to parse float that can parsed quickly.
-  # ie whose integer part can fit inside a 53bits integer.
+  # i.e. whose integer part can fit inside a 53bits integer.
   # their real exponent must also be <= 22. If the float doesn't follow
   # these restrictions, transform the float into this form:
   #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
   # This avoid the problems of decimal character portability.
   # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
   var
-    i = start
+    i = 0
     sign = 1.0
     kdigits, fdigits = 0
     exponent = 0
@@ -143,7 +119,7 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
       if s[i+2] == 'N' or s[i+2] == 'n':
         if i+3 >= s.len or s[i+3] notin IdentChars:
           number = NaN
-          return i+3 - start
+          return i+3
     return 0
 
   # Inf?
@@ -152,7 +128,7 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
       if s[i+2] == 'F' or s[i+2] == 'f':
         if i+3 >= s.len or s[i+3] notin IdentChars:
           number = Inf*sign
-          return i+3 - start
+          return i+3
     return 0
 
   if i < s.len and s[i] in {'0'..'9'}:
@@ -186,8 +162,8 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
 
   # if has no digits: return error
   if kdigits + fdigits <= 0 and
-     (i == start or # no char consumed (empty string).
-     (i == start + 1 and hasSign)): # or only '+' or '-
+     (i == 0 or # no char consumed (empty string).
+     (i == 1 and hasSign)): # or only '+' or '-
     return 0
 
   if i+1 < s.len and s[i] in {'e', 'E'}:
@@ -210,11 +186,13 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
 
   # if exponent greater than can be represented: +/- zero or infinity
   if absExponent > 999:
-    if expNegative:
+    if integer == 0:
+      number = 0.0
+    elif expNegative:
       number = 0.0*sign
     else:
       number = Inf*sign
-    return i - start
+    return i
 
   # if integer is representable in 53 bits:  fast path
   # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
@@ -226,29 +204,30 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
         number = sign * integer.float / powtens[absExponent]
       else:
         number = sign * integer.float * powtens[absExponent]
-      return i - start
+      return i
 
     # if exponent is greater try to fit extra exponent above 22 by multiplying
     # integer part is there is space left.
     let slop = 15 - kdigits - fdigits
     if absExponent <= 22 + slop and not expNegative:
       number = sign * integer.float * powtens[slop] * powtens[absExponent-slop]
-      return i - start
+      return i
 
   # if failed: slow path with strtod.
   var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
   var ti = 0
   let maxlen = t.high - "e+000".len # reserve enough space for exponent
 
-  result = i - start
-  i = start
+  let endPos = i
+  result = endPos
+  i = 0
   # re-parse without error checking, any error should be handled by the code above.
-  if i < s.len and s[i] == '.': i.inc
-  while i < s.len and s[i] in {'0'..'9','+','-'}:
+  if i < endPos and s[i] == '.': i.inc
+  while i < endPos and s[i] in {'0'..'9','+','-'}:
     if ti < maxlen:
       t[ti] = s[i]; inc(ti)
     inc(i)
-    while i < s.len and s[i] in {'.', '_'}: # skip underscore and decimal point
+    while i < endPos and s[i] in {'.', '_'}: # skip underscore and decimal point
       inc(i)
 
   # insert exponent
@@ -263,18 +242,9 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
   t[ti-2] = ('0'.ord + absExponent mod 10).char
   absExponent = absExponent div 10
   t[ti-3] = ('0'.ord + absExponent mod 10).char
+  number = c_strtod(cast[cstring](addr t), nil)
 
-  when defined(nimNoArrayToCstringConversion):
-    number = c_strtod(addr t, nil)
-  else:
-    number = c_strtod(t, nil)
-
-when defined(nimHasInvariant):
-  {.pop.} # staticBoundChecks
-
-proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
-  result = newStringOfCap(sizeof(x)*4)
-  result.addInt x
+{.pop.} # staticBoundChecks
 
 proc nimBoolToStr(x: bool): string {.compilerRtl.} =
   return if x: "true" else: "false"
@@ -283,26 +253,6 @@ proc nimCharToStr(x: char): string {.compilerRtl.} =
   result = newString(1)
   result[0] = x
 
-proc `$`*(x: uint64): string {.noSideEffect, raises: [].} =
-  ## The stringify operator for an unsigned integer argument. Returns `x`
-  ## converted to a decimal string.
-  if x == 0:
-    result = "0"
-  else:
-    result = newString(60)
-    var i = 0
-    var n = x
-    while n != 0:
-      let nn = n div 10'u64
-      result[i] = char(n - 10'u64 * nn + ord('0'))
-      inc i
-      n = nn
-    result.setLen i
-
-    let half = i div 2
-    # Reverse
-    for t in 0 .. half-1: swap(result[t], result[i-t-1])
-
 when defined(gcDestructors):
   proc GC_getStatistics*(): string =
     result = "[GC] total memory: "
diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim
index aa644522f..404b4f78d 100644
--- a/lib/system/strs_v2.nim
+++ b/lib/system/strs_v2.nim
@@ -29,44 +29,77 @@ template contentSize(cap): int = cap + 1 + sizeof(NimStrPayloadBase)
 
 template frees(s) =
   if not isLiteral(s):
-    deallocShared(s.p)
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
+
+template allocPayload(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc(contentSize(newLen)))
+
+template allocPayload0(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared0(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc0(contentSize(newLen)))
+
+template reallocPayload(p: pointer, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared(p, contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc(p, contentSize(newLen)))
+
+template reallocPayload0(p: pointer; oldLen, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared0(p, contentSize(oldLen), contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc0(p, contentSize(oldLen), contentSize(newLen)))
 
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
-  elif old < 65536: result = old * 2
+  elif old <= high(int16): result = old * 2
   else: result = old * 3 div 2 # for large arrays * 3/2 is better
 
-proc prepareAdd(s: var NimStringV2; addlen: int) {.compilerRtl.} =
+proc prepareAdd(s: var NimStringV2; addLen: int) {.compilerRtl.} =
+  let newLen = s.len + addLen
   if isLiteral(s):
     let oldP = s.p
     # can't mutate a literal, so we need a fresh copy here:
-    s.p = cast[ptr NimStrPayload](allocShared0(contentSize(s.len + addlen)))
-    s.p.cap = s.len + addlen
+    s.p = allocPayload(newLen)
+    s.p.cap = newLen
     if s.len > 0:
       # we are about to append, so there is no need to copy the \0 terminator:
-      copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len)
+      copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+    elif oldP == nil:
+      # In the case of `newString(0) & ""`, since `src.len == 0`, `appendString`
+      # will not set the `\0` terminator, so we set it here.
+      s.p.data[0] = '\0'
   else:
     let oldCap = s.p.cap and not strlitFlag
-    if s.len + addlen > oldCap:
-      let newCap = max(s.len + addlen, resize(oldCap))
-      s.p = cast[ptr NimStrPayload](reallocShared0(s.p, contentSize(oldCap), contentSize(newCap)))
+    if newLen > oldCap:
+      let newCap = max(newLen, resize(oldCap))
+      s.p = reallocPayload(s.p, newCap)
       s.p.cap = newCap
+      if newLen < newCap:
+        zeroMem(cast[pointer](addr s.p.data[newLen+1]), newCap - newLen)
 
-proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl.} =
+proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
   prepareAdd(s, 1)
   s.p.data[s.len] = c
-  s.p.data[s.len+1] = '\0'
   inc s.len
+  s.p.data[s.len] = '\0'
 
 proc toNimStr(str: cstring, len: int): NimStringV2 {.compilerproc.} =
   if len <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    var p = cast[ptr NimStrPayload](allocShared0(contentSize(len)))
+    var p = allocPayload(len)
     p.cap = len
-    if len > 0:
-      # we are about to append, so there is no need to copy the \0 terminator:
-      copyMem(unsafeAddr p.data[0], str, len)
+    copyMem(unsafeAddr p.data[0], str, len+1)
     result = NimStringV2(len: len, p: p)
 
 proc cstrToNimstr(str: cstring): NimStringV2 {.compilerRtl.} =
@@ -75,7 +108,7 @@ proc cstrToNimstr(str: cstring): NimStringV2 {.compilerRtl.} =
 
 proc nimToCStringConv(s: NimStringV2): cstring {.compilerproc, nonReloadable, inline.} =
   if s.len == 0: result = cstring""
-  else: result = cstring(unsafeAddr s.p.data)
+  else: result = cast[cstring](unsafeAddr s.p.data)
 
 proc appendString(dest: var NimStringV2; src: NimStringV2) {.compilerproc, inline.} =
   if src.len > 0:
@@ -85,23 +118,24 @@ proc appendString(dest: var NimStringV2; src: NimStringV2) {.compilerproc, inlin
 
 proc appendChar(dest: var NimStringV2; c: char) {.compilerproc, inline.} =
   dest.p.data[dest.len] = c
-  dest.p.data[dest.len+1] = '\0'
   inc dest.len
+  dest.p.data[dest.len] = '\0'
 
 proc rawNewString(space: int): NimStringV2 {.compilerproc.} =
   # this is also 'system.newStringOfCap'.
   if space <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    var p = cast[ptr NimStrPayload](allocShared0(contentSize(space)))
+    var p = allocPayload(space)
     p.cap = space
+    p.data[0] = '\0'
     result = NimStringV2(len: 0, p: p)
 
 proc mnewString(len: int): NimStringV2 {.compilerproc.} =
   if len <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    var p = cast[ptr NimStrPayload](allocShared0(contentSize(len)))
+    var p = allocPayload0(len)
     p.cap = len
     result = NimStringV2(len: len, p: p)
 
@@ -109,13 +143,30 @@ proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
   if newLen == 0:
     discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
   else:
-    if newLen > s.len or isLiteral(s):
-      prepareAdd(s, newLen - s.len)
+    if isLiteral(s):
+      let oldP = s.p
+      s.p = allocPayload(newLen)
+      s.p.cap = newLen
+      if s.len > 0:
+        copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+        if newLen > s.len:
+          zeroMem(cast[pointer](addr s.p.data[s.len]), newLen - s.len + 1)
+        else:
+          s.p.data[newLen] = '\0'
+      else:
+        zeroMem(cast[pointer](addr s.p.data[0]), newLen + 1)
+    elif newLen > s.len:
+      let oldCap = s.p.cap and not strlitFlag
+      if newLen > oldCap:
+        let newCap = max(newLen, resize(oldCap))
+        s.p = reallocPayload0(s.p, oldCap, newCap)
+        s.p.cap = newCap
+
     s.p.data[newLen] = '\0'
   s.len = newLen
 
 proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
-  if a.p == b.p: return
+  if a.p == b.p and a.len == b.len: return
   if isLiteral(b):
     # we can shallow copy literals:
     frees(a)
@@ -127,15 +178,47 @@ proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
       # 'let y = newStringOfCap(); var x = y'
       # on the other hand... These get turned into moves now.
       frees(a)
-      a.p = cast[ptr NimStrPayload](allocShared0(contentSize(b.len)))
+      a.p = allocPayload(b.len)
       a.p.cap = b.len
     a.len = b.len
     copyMem(unsafeAddr a.p.data[0], unsafeAddr b.p.data[0], b.len+1)
 
-proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl.} =
+proc nimPrepareStrMutationImpl(s: var NimStringV2) =
+  let oldP = s.p
+  # can't mutate a literal, so we need a fresh copy here:
+  s.p = allocPayload(s.len)
+  s.p.cap = s.len
+  copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1)
+
+proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl.} =
   if s.p != nil and (s.p.cap and strlitFlag) == strlitFlag:
-    let oldP = s.p
-    # can't mutate a literal, so we need a fresh copy here:
-    s.p = cast[ptr NimStrPayload](allocShared0(contentSize(s.len)))
-    s.p.cap = s.len
-    copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1)
+    nimPrepareStrMutationImpl(s)
+
+proc prepareMutation*(s: var string) {.inline.} =
+  # string literals are "copy on write", so you need to call
+  # `prepareMutation` before modifying the strings via `addr`.
+  {.cast(noSideEffect).}:
+    let s = unsafeAddr s
+    nimPrepareStrMutationV2(cast[ptr NimStringV2](s)[])
+
+proc nimAddStrV1(s: var NimStringV2; src: NimStringV2) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, src.len)
+  appendString s, src
+
+proc nimDestroyStrV1(s: NimStringV2) {.compilerRtl, inl.} =
+  frees(s)
+
+proc nimStrAtLe(s: string; idx: int; ch: char): bool {.compilerRtl, inl.} =
+  result = idx < s.len and s[idx] <= ch
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[ptr NimStringV2](unsafeAddr self)
+  result = if str.p != nil: str.p.cap and not strlitFlag else: 0
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
deleted file mode 100644
index fa4164b95..000000000
--- a/lib/system/syslocks.nim
+++ /dev/null
@@ -1,226 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Low level system locks and condition vars.
-
-{.push stackTrace: off.}
-
-when defined(Windows):
-  type
-    Handle = int
-
-    SysLock {.importc: "CRITICAL_SECTION",
-              header: "<windows.h>", final, pure.} = object # CRITICAL_SECTION in WinApi
-      DebugInfo: pointer
-      LockCount: int32
-      RecursionCount: int32
-      OwningThread: int
-      LockSemaphore: int
-      SpinCount: int
-
-    SysCond = Handle
-
-  proc initSysLock(L: var SysLock) {.importc: "InitializeCriticalSection",
-                                     header: "<windows.h>".}
-    ## Initializes the lock `L`.
-
-  proc tryAcquireSysAux(L: var SysLock): int32 {.importc: "TryEnterCriticalSection",
-                                                 header: "<windows.h>".}
-    ## Tries to acquire the lock `L`.
-
-  proc tryAcquireSys(L: var SysLock): bool {.inline.} =
-    result = tryAcquireSysAux(L) != 0'i32
-
-  proc acquireSys(L: var SysLock) {.importc: "EnterCriticalSection",
-                                    header: "<windows.h>".}
-    ## Acquires the lock `L`.
-
-  proc releaseSys(L: var SysLock) {.importc: "LeaveCriticalSection",
-                                    header: "<windows.h>".}
-    ## Releases the lock `L`.
-
-  proc deinitSys(L: var SysLock) {.importc: "DeleteCriticalSection",
-                                   header: "<windows.h>".}
-
-  proc createEvent(lpEventAttributes: pointer,
-                   bManualReset, bInitialState: int32,
-                   lpName: cstring): SysCond {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CreateEventA".}
-
-  proc closeHandle(hObject: Handle) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CloseHandle".}
-  proc waitForSingleObject(hHandle: Handle, dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForSingleObject", noSideEffect.}
-
-  proc signalSysCond(hEvent: SysCond) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "SetEvent".}
-
-  proc initSysCond(cond: var SysCond) {.inline.} =
-    cond = createEvent(nil, 0'i32, 0'i32, nil)
-  proc deinitSysCond(cond: var SysCond) {.inline.} =
-    closeHandle(cond)
-  proc waitSysCond(cond: var SysCond, lock: var SysLock) =
-    releaseSys(lock)
-    discard waitForSingleObject(cond, -1'i32)
-    acquireSys(lock)
-
-  proc waitSysCondWindows(cond: var SysCond) =
-    discard waitForSingleObject(cond, -1'i32)
-
-elif defined(genode):
-  const
-    Header = "genode_cpp/syslocks.h"
-  type
-    SysLock {.importcpp: "Nim::SysLock", pure, final,
-              header: Header.} = object
-    SysCond {.importcpp: "Nim::SysCond", pure, final,
-              header: Header.} = object
-
-  proc initSysLock(L: var SysLock) = discard
-  proc deinitSys(L: var SysLock) = discard
-  proc acquireSys(L: var SysLock) {.noSideEffect, importcpp.}
-  proc tryAcquireSys(L: var SysLock): bool {.noSideEffect, importcpp.}
-  proc releaseSys(L: var SysLock) {.noSideEffect, importcpp.}
-
-  proc initSysCond(L: var SysCond) = discard
-  proc deinitSysCond(L: var SysCond) = discard
-  proc waitSysCond(cond: var SysCond, lock: var SysLock) {.
-    noSideEffect, importcpp.}
-  proc signalSysCond(cond: var SysCond) {.
-    noSideEffect, importcpp.}
-
-else:
-  type
-    SysLockObj {.importc: "pthread_mutex_t", pure, final,
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[40 div sizeof(clong), clong]
-
-    SysLockAttr {.importc: "pthread_mutexattr_t", pure, final
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[4 div sizeof(cint), cint]  # actually a cint
-
-    SysCondObj {.importc: "pthread_cond_t", pure, final,
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[48 div sizeof(clonglong), clonglong]
-
-    SysCondAttr {.importc: "pthread_condattr_t", pure, final
-               header: """#include <sys/types.h>
-                          #include <pthread.h>""".} = object
-      when defined(linux) and defined(amd64):
-        abi: array[4 div sizeof(cint), cint]  # actually a cint
-
-    SysLockType = distinct cint
-
-  proc initSysLockAux(L: var SysLockObj, attr: ptr SysLockAttr) {.
-    importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.}
-  proc deinitSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_destroy", header: "<pthread.h>".}
-
-  proc acquireSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_lock", header: "<pthread.h>".}
-  proc tryAcquireSysAux(L: var SysLockObj): cint {.noSideEffect,
-    importc: "pthread_mutex_trylock", header: "<pthread.h>".}
-
-  proc releaseSysAux(L: var SysLockObj) {.noSideEffect,
-    importc: "pthread_mutex_unlock", header: "<pthread.h>".}
-
-  when defined(ios):
-    # iOS will behave badly if sync primitives are moved in memory. In order
-    # to prevent this once and for all, we're doing an extra malloc when
-    # initializing the primitive.
-    type
-      SysLock = ptr SysLockObj
-      SysCond = ptr SysCondObj
-
-    when not declared(c_malloc):
-      proc c_malloc(size: csize): pointer {.
-        importc: "malloc", header: "<stdlib.h>".}
-      proc c_free(p: pointer) {.
-        importc: "free", header: "<stdlib.h>".}
-
-    proc initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) =
-      L = cast[SysLock](c_malloc(sizeof(SysLockObj)))
-      initSysLockAux(L[], attr)
-
-    proc deinitSys(L: var SysLock) =
-      deinitSysAux(L[])
-      c_free(L)
-
-    template acquireSys(L: var SysLock) =
-      acquireSysAux(L[])
-    template tryAcquireSys(L: var SysLock): bool =
-      tryAcquireSysAux(L[]) == 0'i32
-    template releaseSys(L: var SysLock) =
-      releaseSysAux(L[])
-  else:
-    type
-      SysLock = SysLockObj
-      SysCond = SysCondObj
-
-    template initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) =
-      initSysLockAux(L, attr)
-    template deinitSys(L: var SysLock) =
-      deinitSysAux(L)
-    template acquireSys(L: var SysLock) =
-      acquireSysAux(L)
-    template tryAcquireSys(L: var SysLock): bool =
-      tryAcquireSysAux(L) == 0'i32
-    template releaseSys(L: var SysLock) =
-      releaseSysAux(L)
-
-  when insideRLocksModule:
-    proc SysLockType_Reentrant: SysLockType =
-      {.emit: "`result` = PTHREAD_MUTEX_RECURSIVE;".}
-    proc initSysLockAttr(a: var SysLockAttr) {.
-      importc: "pthread_mutexattr_init", header: "<pthread.h>", noSideEffect.}
-    proc setSysLockType(a: var SysLockAttr, t: SysLockType) {.
-      importc: "pthread_mutexattr_settype", header: "<pthread.h>", noSideEffect.}
-
-  else:
-    proc initSysCondAux(cond: var SysCondObj, cond_attr: ptr SysCondAttr = nil) {.
-      importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
-    proc deinitSysCondAux(cond: var SysCondObj) {.noSideEffect,
-      importc: "pthread_cond_destroy", header: "<pthread.h>".}
-
-    proc waitSysCondAux(cond: var SysCondObj, lock: var SysLockObj) {.
-      importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
-    proc signalSysCondAux(cond: var SysCondObj) {.
-      importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
-
-    when defined(ios):
-      proc initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) =
-        cond = cast[SysCond](c_malloc(sizeof(SysCondObj)))
-        initSysCondAux(cond[], cond_attr)
-
-      proc deinitSysCond(cond: var SysCond) =
-        deinitSysCondAux(cond[])
-        c_free(cond)
-
-      template waitSysCond(cond: var SysCond, lock: var SysLock) =
-        waitSysCondAux(cond[], lock[])
-      template signalSysCond(cond: var SysCond) =
-        signalSysCondAux(cond[])
-    else:
-      template initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) =
-        initSysCondAux(cond, cond_attr)
-      template deinitSysCond(cond: var SysCond) =
-        deinitSysCondAux(cond)
-
-      template waitSysCond(cond: var SysCond, lock: var SysLock) =
-        waitSysCondAux(cond, lock)
-      template signalSysCond(cond: var SysCond) =
-        signalSysCondAux(cond)
-
-{.pop.}
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
deleted file mode 100644
index dc2d13578..000000000
--- a/lib/system/sysspawn.nim
+++ /dev/null
@@ -1,194 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Implements Nim's 'spawn'.
-
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-{.push stackTrace:off.}
-
-# We declare our own condition variables here to get rid of the dummy lock
-# on Windows:
-
-type
-  CondVar = object
-    c: SysCond
-    when defined(posix):
-      stupidLock: SysLock
-      counter: int
-
-proc createCondVar(): CondVar =
-  initSysCond(result.c)
-  when defined(posix):
-    initSysLock(result.stupidLock)
-    #acquireSys(result.stupidLock)
-
-proc destroyCondVar(c: var CondVar) {.inline.} =
-  deinitSysCond(c.c)
-
-proc await(cv: var CondVar) =
-  when defined(posix):
-    acquireSys(cv.stupidLock)
-    while cv.counter <= 0:
-      waitSysCond(cv.c, cv.stupidLock)
-    dec cv.counter
-    releaseSys(cv.stupidLock)
-  else:
-    waitSysCondWindows(cv.c)
-
-proc signal(cv: var CondVar) =
-  when defined(posix):
-    acquireSys(cv.stupidLock)
-    inc cv.counter
-    releaseSys(cv.stupidLock)
-  signalSysCond(cv.c)
-
-type
-  FastCondVar = object
-    event, slowPath: bool
-    slow: CondVar
-
-proc createFastCondVar(): FastCondVar =
-  initSysCond(result.slow.c)
-  when defined(posix):
-    initSysLock(result.slow.stupidLock)
-    #acquireSys(result.slow.stupidLock)
-  result.event = false
-  result.slowPath = false
-
-proc await(cv: var FastCondVar) =
-  #for i in 0 .. 50:
-  #  if cas(addr cv.event, true, false):
-  #    # this is a HIT: Triggers > 95% in my tests.
-  #    return
-  #  cpuRelax()
-  #cv.slowPath = true
-  # XXX For some reason this crashes some test programs
-  await(cv.slow)
-  cv.event = false
-
-proc signal(cv: var FastCondVar) =
-  cv.event = true
-  #if cas(addr cv.slowPath, true, false):
-  signal(cv.slow)
-
-type
-  Barrier* {.compilerProc.} = object
-    counter: int
-    cv: CondVar
-
-proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
-  atomicInc b.counter
-
-proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
-  atomicDec b.counter
-  if b.counter <= 0: signal(b.cv)
-
-proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
-  b.counter = 0
-  b.cv = createCondVar()
-
-proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
-  await(b.cv)
-  destroyCondVar(b.cv)
-
-{.pop.}
-
-# ----------------------------------------------------------------------------
-
-type
-  WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
-  Worker = object
-    taskArrived: CondVar
-    taskStarted: FastCondVar #\
-    # task data:
-    f: WorkerProc
-    data: pointer
-    ready: bool # put it here for correct alignment!
-
-proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
-  let w = cast[ptr Worker](p)
-  signal(w.taskStarted)
-
-var gSomeReady = createFastCondVar()
-
-proc slave(w: ptr Worker) {.thread.} =
-  while true:
-    w.ready = true # If we instead signal "workerReady" we need the scheduler
-                   # to notice this. The scheduler could then optimize the
-                   # layout of the worker threads (e.g. keep the list sorted)
-                   # so that no search for a "ready" thread is necessary.
-                   # This might be implemented later, but is more tricky than
-                   # it looks because 'spawn' itself can run concurrently.
-    signal(gSomeReady)
-    await(w.taskArrived)
-    assert(not w.ready)
-    # shield against spurious wakeups:
-    if w.data != nil:
-      w.f(w, w.data)
-      w.data = nil
-
-const NumThreads = 4
-
-var
-  workers: array[NumThreads, Thread[ptr Worker]]
-  workersData: array[NumThreads, Worker]
-
-proc setup() =
-  for i in 0 ..< NumThreads:
-    workersData[i].taskArrived = createCondVar()
-    workersData[i].taskStarted = createFastCondVar()
-    createThread(workers[i], slave, addr(workersData[i]))
-
-proc preferSpawn*(): bool =
-  ## Use this proc to determine quickly if a 'spawn' or a direct call is
-  ## preferable. If it returns 'true' a 'spawn' may make sense. In general
-  ## it is not necessary to call this directly; use 'spawnX' instead.
-  result = gSomeReady.event
-
-proc spawn*(call: typed) {.magic: "Spawn".}
-  ## always spawns a new task, so that the 'call' is never executed on
-  ## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
-  ## is gcsafe and has 'void' as the return type.
-
-template spawnX*(call: typed) =
-  ## spawns a new task if a CPU core is ready, otherwise executes the
-  ## call in the calling thread. Usually it is advised to
-  ## use 'spawn' in order to not block the producer for an unknown
-  ## amount of time. 'call' has to be proc call 'p(...)' where 'p'
-  ## is gcsafe and has 'void' as the return type.
-  if preferSpawn(): spawn call
-  else: call
-
-proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
-  # implementation of 'spawn' that is used by the code generator.
-  while true:
-    for i in 0.. high(workers):
-      let w = addr(workersData[i])
-      if cas(addr w.ready, true, false):
-        w.data = data
-        w.f = fn
-        signal(w.taskArrived)
-        await(w.taskStarted)
-        return
-    await(gSomeReady)
-
-proc sync*() =
-  ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
-  ## waiting, you have to use an explicit barrier.
-  while true:
-    var allReady = true
-    for i in 0 .. high(workers):
-      if not allReady: break
-      allReady = allReady and workersData[i].ready
-    if allReady: break
-    await(gSomeReady)
-
-setup()
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 06e605a7b..3621c4960 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -17,10 +17,10 @@
 
 
 proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
-  cast[pointer](cast[ByteAddress](a) +% align(GenericSeqSize, elemAlign))
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
 
 proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
-  cast[pointer](cast[ByteAddress](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
 
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
@@ -47,27 +47,22 @@ else:
   template allocStrNoInit(size: untyped): untyped =
     cast[NimString](newObjNoInit(addr(strDesc), size))
 
-proc rawNewStringNoInit(space: int): NimString {.compilerproc.} =
-  var s = space
-  if s < 7: s = 7
+proc rawNewStringNoInit(space: int): NimString =
+  let s = max(space, 7)
   result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
-  result.len = 0
   when defined(gogc):
     result.elemSize = 1
 
 proc rawNewString(space: int): NimString {.compilerproc.} =
-  var s = space
-  if s < 7: s = 7
-  result = allocStr(sizeof(TGenericSeq) + s + 1)
-  result.reserved = s
+  result = rawNewStringNoInit(space)
   result.len = 0
-  when defined(gogc):
-    result.elemSize = 1
+  result.data[0] = '\0'
 
 proc mnewString(len: int): NimString {.compilerproc.} =
-  result = rawNewString(len)
+  result = rawNewStringNoInit(len)
   result.len = len
+  zeroMem(addr result.data[0], len + 1)
 
 proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
   # This is not used by most recent versions of the compiler anymore, but
@@ -75,13 +70,10 @@ proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
   let start = max(start, 0)
   if s == nil: return nil
   let len = min(last, s.len-1) - start + 1
-  if len > 0:
-    result = rawNewStringNoInit(len)
-    result.len = len
-    copyMem(addr(result.data), addr(s.data[start]), len)
-    result.data[len] = '\0'
-  else:
-    result = rawNewString(len)
+  result = rawNewStringNoInit(len)
+  result.len = len
+  copyMem(addr(result.data), addr(s.data[start]), len)
+  result.data[len] = '\0'
 
 proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
   # This is not used by most recent versions of the compiler anymore, but
@@ -91,12 +83,13 @@ proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
 
 proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inline.} =
   if s == nil or s.len == 0: result = cstring""
-  else: result = cstring(addr s.data)
+  else: result = cast[cstring](addr s.data)
 
 proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
   result = rawNewStringNoInit(len)
   result.len = len
-  copyMem(addr(result.data), str, len + 1)
+  copyMem(addr(result.data), str, len)
+  result.data[len] = '\0'
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
   if str == nil: NimString(nil)
@@ -160,13 +153,9 @@ proc addChar(s: NimString, c: char): NimString =
     result = s
     if result.len >= result.space:
       let r = resize(result.space)
-      when defined(nimIncrSeqV3):
-        result = rawNewStringNoInit(r)
-        result.len = s.len
-        copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
-      else:
-        result = cast[NimString](growObj(result,
-          sizeof(TGenericSeq) + r + 1))
+      result = rawNewStringNoInit(r)
+      result.len = s.len
+      copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
       result.reserved = r
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
@@ -205,17 +194,14 @@ proc addChar(s: NimString, c: char): NimString =
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
   if dest == nil:
-    result = rawNewStringNoInit(addlen)
+    result = rawNewString(addlen)
   elif dest.len + addlen <= dest.space:
     result = dest
   else: # slow path:
     let sp = max(resize(dest.space), dest.len + addlen)
-    when defined(nimIncrSeqV3):
-      result = rawNewStringNoInit(sp)
-      result.len = dest.len
-      copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1)
-    else:
-      result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
+    result = rawNewStringNoInit(sp)
+    result.len = dest.len
+    copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1)
     result.reserved = sp
     #result = rawNewString(sp)
     #copyMem(result, dest, dest.len + sizeof(TGenericSeq))
@@ -234,19 +220,19 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
   let n = max(newLen, 0)
   if s == nil:
-    result = mnewString(newLen)
+    if n == 0:
+      return s
+    else:
+      result = mnewString(n)
   elif n <= s.space:
     result = s
   else:
-    let sp = max(resize(s.space), newLen)
-    when defined(nimIncrSeqV3):
-      result = rawNewStringNoInit(sp)
-      result.len = s.len
-      copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
-      zeroMem(addr result.data[s.len], newLen - s.len)
-      result.reserved = sp
-    else:
-      result = resizeString(s, n)
+    let sp = max(resize(s.space), n)
+    result = rawNewStringNoInit(sp)
+    result.len = s.len
+    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len)
+    zeroMem(addr result.data[s.len], n - s.len)
+    result.reserved = sp
   result.len = n
   result.data[n] = '\0'
 
@@ -282,15 +268,11 @@ proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerproc.} =
     result = s
     if result.len >= result.space:
       let r = resize(result.space)
-      when defined(nimIncrSeqV3):
-        result = cast[PGenericSeq](newSeq(typ, r))
-        result.len = s.len
-        copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
-        # since we steal the content from 's', it's crucial to set s's len to 0.
-        s.len = 0
-      else:
-        result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, typ.base.align) + typ.base.size * r))
-        result.reserved = r
+      result = cast[PGenericSeq](newSeq(typ, r))
+      result.len = s.len
+      copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
 
 proc setLengthSeq(seq: PGenericSeq, elemSize, elemAlign, newLen: int): PGenericSeq {.
     compilerRtl, inl.} =
@@ -304,20 +286,10 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, elemAlign, newLen: int): PGenericS
     when not defined(boehmGC) and not defined(nogc) and
          not defined(gcMarkAndSweep) and not defined(gogc) and
          not defined(gcRegions):
-      when false: # compileOption("gc", "v2"):
+      if ntfNoRefs notin extGetCellType(result).base.flags:
         for i in newLen..result.len-1:
-          let len0 = gch.tempStack.len
           forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
-                            extGetCellType(result).base, waPush)
-          let len1 = gch.tempStack.len
-          for i in len0 ..< len1:
-            doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
-          gch.tempStack.len = len0
-      else:
-        if ntfNoRefs notin extGetCellType(result).base.flags:
-          for i in newLen..result.len-1:
-            forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
-                              extGetCellType(result).base, waZctDecRef)
+                            extGetCellType(result).base, waZctDecRef)
 
     # XXX: zeroing out the memory can still result in crashes if a wiped-out
     # cell is aliased by another pointer (ie proc parameter or a let variable).
@@ -332,38 +304,60 @@ proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
     compilerRtl.} =
   sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
   if s == nil:
-    result = cast[PGenericSeq](newSeq(typ, newLen))
+    if newLen == 0:
+      result = s
+    else:
+      result = cast[PGenericSeq](newSeq(typ, newLen))
   else:
-    when defined(nimIncrSeqV3):
-      let elemSize = typ.base.size
-      let elemAlign = typ.base.align
-      if s.space < newLen:
-        let r = max(resize(s.space), newLen)
-        result = cast[PGenericSeq](newSeq(typ, r))
-        copyMem(dataPointer(result, elemAlign), dataPointer(s, elemAlign), s.len * elemSize)
-        # since we steal the content from 's', it's crucial to set s's len to 0.
-        s.len = 0
-      elif newLen < s.len:
-        result = s
-        # we need to decref here, otherwise the GC leaks!
-        when not defined(boehmGC) and not defined(nogc) and
-            not defined(gcMarkAndSweep) and not defined(gogc) and
-            not defined(gcRegions):
-          if ntfNoRefs notin typ.base.flags:
-            for i in newLen..result.len-1:
-              forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
-                                extGetCellType(result).base, waZctDecRef)
-
-        # XXX: zeroing out the memory can still result in crashes if a wiped-out
-        # cell is aliased by another pointer (ie proc parameter or a let variable).
-        # This is a tough problem, because even if we don't zeroMem here, in the
-        # presence of user defined destructors, the user will expect the cell to be
-        # "destroyed" thus creating the same problem. We can destroy the cell in the
-        # finalizer of the sequence, but this makes destruction non-deterministic.
-        zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
-      else:
-        result = s
-        zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
-      result.len = newLen
+    let elemSize = typ.base.size
+    let elemAlign = typ.base.align
+    if s.space < newLen:
+      let r = max(resize(s.space), newLen)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      copyMem(dataPointer(result, elemAlign), dataPointer(s, elemAlign), s.len * elemSize)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+    elif newLen < s.len:
+      result = s
+      # we need to decref here, otherwise the GC leaks!
+      when not defined(boehmGC) and not defined(nogc) and
+          not defined(gcMarkAndSweep) and not defined(gogc) and
+          not defined(gcRegions):
+        if ntfNoRefs notin typ.base.flags:
+          for i in newLen..result.len-1:
+            forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
+                              extGetCellType(result).base, waZctDecRef)
+
+      # XXX: zeroing out the memory can still result in crashes if a wiped-out
+      # cell is aliased by another pointer (ie proc parameter or a let variable).
+      # This is a tough problem, because even if we don't zeroMem here, in the
+      # presence of user defined destructors, the user will expect the cell to be
+      # "destroyed" thus creating the same problem. We can destroy the cell in the
+      # finalizer of the sequence, but this makes destruction non-deterministic.
+      zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
     else:
-      result = setLengthSeq(s, typ.base.size, newLen)
+      result = s
+      zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
+    result.len = newLen
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[NimString](self)
+  result = if str != nil: str.space else: 0
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[PGenericSeq](self)
+  result = if sek != nil: sek.space else: 0
diff --git a/lib/system/threadids.nim b/lib/system/threadids.nim
new file mode 100644
index 000000000..3a6eadcbb
--- /dev/null
+++ b/lib/system/threadids.nim
@@ -0,0 +1,103 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# we need to cache current threadId to not perform syscall all the time
+var threadId {.threadvar.}: int
+
+when defined(windows):
+  proc getCurrentThreadId(): int32 {.
+    stdcall, dynlib: "kernel32", importc: "GetCurrentThreadId".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getCurrentThreadId())
+    result = threadId
+
+elif defined(linux):
+  proc syscall(arg: clong): clong {.varargs, importc: "syscall", header: "<unistd.h>".}
+  when defined(amd64):
+    const NR_gettid = clong(186)
+  else:
+    var NR_gettid {.importc: "__NR_gettid", header: "<sys/syscall.h>".}: clong
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(NR_gettid))
+    result = threadId
+
+elif defined(dragonfly):
+  proc lwp_gettid(): int32 {.importc, header: "unistd.h".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_gettid())
+    result = threadId
+
+elif defined(openbsd):
+  proc getthrid(): int32 {.importc: "getthrid", header: "<unistd.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getthrid())
+    result = threadId
+
+elif defined(netbsd):
+  proc lwp_self(): int32 {.importc: "_lwp_self", header: "<lwp.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_self())
+    result = threadId
+
+elif defined(freebsd):
+  proc syscall(arg: cint, arg0: ptr cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thr_self {.importc:"SYS_thr_self", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    var tid = 0.cint
+    if threadId == 0:
+      discard syscall(SYS_thr_self, addr tid)
+      threadId = tid
+    result = threadId
+
+elif defined(macosx):
+  proc syscall(arg: cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thread_selfid {.importc:"SYS_thread_selfid", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(SYS_thread_selfid))
+    result = threadId
+
+elif defined(solaris):
+  type thread_t {.importc: "thread_t", header: "<thread.h>".} = distinct int
+  proc thr_self(): thread_t {.importc, header: "<thread.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(thr_self())
+    result = threadId
+
+elif defined(haiku):
+  type thr_id {.importc: "thread_id", header: "<OS.h>".} = distinct int32
+  proc find_thread(name: cstring): thr_id {.importc, header: "<OS.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(find_thread(nil))
+    result = threadId
diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim
new file mode 100644
index 000000000..285b8f5e7
--- /dev/null
+++ b/lib/system/threadimpl.nim
@@ -0,0 +1,111 @@
+var
+  nimThreadDestructionHandlers* {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}]
+when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions):
+  proc deallocOsPages() {.rtl, raises: [].}
+proc threadTrouble() {.raises: [], gcsafe.}
+# create for the main thread. Note: do not insert this data into the list
+# of all threads; it's not to be stopped etc.
+when not defined(useNimRtl):
+  #when not defined(createNimRtl): initStackBottom()
+  when declared(initGC):
+    initGC()
+    when not emulatedThreadVars:
+      type ThreadType {.pure.} = enum
+        None = 0,
+        NimThread = 1,
+        ForeignThread = 2
+      var
+        threadType {.rtlThreadVar.}: ThreadType
+
+      threadType = ThreadType.NimThread
+
+when defined(gcDestructors):
+  proc deallocThreadStorage(p: pointer) = c_free(p)
+else:
+  template deallocThreadStorage(p: pointer) = deallocShared(p)
+
+template afterThreadRuns() =
+  for i in countdown(nimThreadDestructionHandlers.len-1, 0):
+    nimThreadDestructionHandlers[i]()
+
+proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) =
+  ## Registers a *thread local* handler that is called at the thread's
+  ## destruction.
+  ##
+  ## A thread is destructed when the `.thread` proc returns
+  ## normally or when it raises an exception. Note that unhandled exceptions
+  ## in a thread nevertheless cause the whole process to die.
+  nimThreadDestructionHandlers.add handler
+
+when defined(boehmgc):
+  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
+  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
+    {.importc: "GC_call_with_stack_base", boehmGC.}
+  proc boehmGC_register_my_thread(sb: pointer)
+    {.importc: "GC_register_my_thread", boehmGC.}
+  proc boehmGC_unregister_my_thread()
+    {.importc: "GC_unregister_my_thread", boehmGC.}
+
+  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} =
+    boehmGC_register_my_thread(sb)
+    try:
+      let thrd = cast[ptr Thread[TArg]](thrd)
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        thrd.dataFn(thrd.data)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+    boehmGC_unregister_my_thread()
+else:
+  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+    try:
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        when defined(nimV2):
+          thrd.dataFn(thrd.data)
+        else:
+          var x: TArg
+          deepCopy(x, thrd.data)
+          thrd.dataFn(x)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+      when hasAllocStack:
+        deallocThreadStorage(thrd.rawStack)
+
+proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+  when defined(boehmgc):
+    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
+  elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors:
+    var p {.volatile.}: pointer
+    # init the GC for refc/markandsweep
+    nimGC_setStackBottom(addr(p))
+    when declared(initGC):
+      initGC()
+    when declared(threadType):
+      threadType = ThreadType.NimThread
+    threadProcWrapDispatch[TArg](thrd)
+    when declared(deallocOsPages): deallocOsPages()
+  else:
+    threadProcWrapDispatch(thrd)
+
+template nimThreadProcWrapperBody*(closure: untyped): untyped =
+  var thrd = cast[ptr Thread[TArg]](closure)
+  var core = thrd.core
+  when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core)
+  threadProcWrapStackFrame(thrd)
+  # Since an unhandled exception terminates the whole process (!), there is
+  # no need for a ``try finally`` here, nor would it be correct: The current
+  # exception is tried to be re-raised by the code-gen after the ``finally``!
+  # However this is doomed to fail, because we already unmapped every heap
+  # page!
+
+  # mark as not running anymore:
+  thrd.core = nil
+  thrd.dataFn = nil
+  deallocThreadStorage(cast[pointer](core))
diff --git a/lib/system/threadlocalstorage.nim b/lib/system/threadlocalstorage.nim
index 117af4c25..e6ad9dca5 100644
--- a/lib/system/threadlocalstorage.nim
+++ b/lib/system/threadlocalstorage.nim
@@ -1,83 +1,33 @@
+import std/private/threadtypes
 
 when defined(windows):
   type
-    SysThread* = Handle
-    WinThreadProc = proc (x: pointer): int32 {.stdcall.}
-
-  proc createThread(lpThreadAttributes: pointer, dwStackSize: int32,
-                     lpStartAddress: WinThreadProc,
-                     lpParameter: pointer,
-                     dwCreationFlags: int32,
-                     lpThreadId: var int32): SysThread {.
-    stdcall, dynlib: "kernel32", importc: "CreateThread".}
-
-  proc winSuspendThread(hThread: SysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "SuspendThread".}
-
-  proc winResumeThread(hThread: SysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "ResumeThread".}
-
-  proc waitForMultipleObjects(nCount: int32,
-                              lpHandles: ptr SysThread,
-                              bWaitAll: int32,
-                              dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".}
-
-  proc terminateThread(hThread: SysThread, dwExitCode: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "TerminateThread".}
-
-  proc getCurrentThreadId(): int32 {.
-    stdcall, dynlib: "kernel32", importc: "GetCurrentThreadId".}
-
-  type
     ThreadVarSlot = distinct int32
 
-  when true:
-    proc threadVarAlloc(): ThreadVarSlot {.
-      importc: "TlsAlloc", stdcall, header: "<windows.h>".}
-    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
-      importc: "TlsSetValue", stdcall, header: "<windows.h>".}
-    proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
-      importc: "TlsGetValue", stdcall, header: "<windows.h>".}
-
-    proc getLastError(): uint32 {.
-      importc: "GetLastError", stdcall, header: "<windows.h>".}
-    proc setLastError(x: uint32) {.
-      importc: "SetLastError", stdcall, header: "<windows.h>".}
-
-    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
-      let realLastError = getLastError()
-      result = tlsGetValue(dwTlsIndex)
-      setLastError(realLastError)
-  else:
-    proc threadVarAlloc(): ThreadVarSlot {.
-      importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
-    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
-      importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
-    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
-      importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
+  proc threadVarAlloc(): ThreadVarSlot {.
+    importc: "TlsAlloc", stdcall, header: "<windows.h>".}
+  proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
+    importc: "TlsSetValue", stdcall, header: "<windows.h>".}
+  proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
+    importc: "TlsGetValue", stdcall, header: "<windows.h>".}
 
-  proc setThreadAffinityMask(hThread: SysThread, dwThreadAffinityMask: uint) {.
-    importc: "SetThreadAffinityMask", stdcall, header: "<windows.h>".}
+  proc getLastError(): uint32 {.
+    importc: "GetLastError", stdcall, header: "<windows.h>".}
+  proc setLastError(x: uint32) {.
+    importc: "SetLastError", stdcall, header: "<windows.h>".}
+
+  proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
+    let realLastError = getLastError()
+    result = tlsGetValue(dwTlsIndex)
+    setLastError(realLastError)
 
 elif defined(genode):
-  import genode/env
   const
     GenodeHeader = "genode_cpp/threads.h"
+
   type
-    SysThread* {.importcpp: "Nim::SysThread",
-                 header: GenodeHeader, final, pure.} = object
-    GenodeThreadProc = proc (x: pointer) {.noconv.}
     ThreadVarSlot = int
 
-  proc initThread(s: var SysThread,
-                  env: GenodeEnv,
-                  stackSize: culonglong,
-                  entry: GenodeThreadProc,
-                  arg: pointer,
-                  affinity: cuint) {.
-    importcpp: "#.initThread(@)".}
-
   proc threadVarAlloc(): ThreadVarSlot = 0
 
   proc offMainThread(): bool {.
@@ -113,60 +63,18 @@ else:
   when not defined(haiku):
     {.passc: "-pthread".}
 
-  const
-    schedh = "#define _GNU_SOURCE\n#include <sched.h>"
-    pthreadh = "#define _GNU_SOURCE\n#include <pthread.h>"
-
-  when not declared(Time):
-    when defined(linux):
-      type Time = clong
-    else:
-      type Time = int
-
   when (defined(linux) or defined(nintendoswitch)) and defined(amd64):
     type
-      SysThread* {.importc: "pthread_t",
-                  header: "<sys/types.h>" .} = distinct culong
-      Pthread_attr {.importc: "pthread_attr_t",
-                    header: "<sys/types.h>".} = object
-        abi: array[56 div sizeof(clong), clong]
       ThreadVarSlot {.importc: "pthread_key_t",
                     header: "<sys/types.h>".} = distinct cuint
   elif defined(openbsd) and defined(amd64):
     type
-      SysThread* {.importc: "pthread_t", header: "<pthread.h>".} = object
-      Pthread_attr {.importc: "pthread_attr_t",
-                       header: "<pthread.h>".} = object
       ThreadVarSlot {.importc: "pthread_key_t",
                      header: "<pthread.h>".} = cint
   else:
     type
-      SysThread* {.importc: "pthread_t", header: "<sys/types.h>".} = object
-      Pthread_attr {.importc: "pthread_attr_t",
-                       header: "<sys/types.h>".} = object
       ThreadVarSlot {.importc: "pthread_key_t",
                      header: "<sys/types.h>".} = object
-  type
-    Timespec {.importc: "struct timespec", header: "<time.h>".} = object
-      tv_sec: Time
-      tv_nsec: clong
-
-  proc pthread_attr_init(a1: var Pthread_attr): cint {.
-    importc, header: pthreadh.}
-  proc pthread_attr_setstacksize(a1: var Pthread_attr, a2: int): cint {.
-    importc, header: pthreadh.}
-  proc pthread_attr_destroy(a1: var Pthread_attr): cint {.
-    importc, header: pthreadh.}
-
-  proc pthread_create(a1: var SysThread, a2: var Pthread_attr,
-            a3: proc (x: pointer): pointer {.noconv.},
-            a4: pointer): cint {.importc: "pthread_create",
-            header: pthreadh.}
-  proc pthread_join(a1: SysThread, a2: ptr pointer): cint {.
-    importc, header: pthreadh.}
-
-  proc pthread_cancel(a1: SysThread): cint {.
-    importc: "pthread_cancel", header: pthreadh.}
 
   proc pthread_getspecific(a1: ThreadVarSlot): pointer {.
     importc: "pthread_getspecific", header: pthreadh.}
@@ -186,44 +94,13 @@ else:
   proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
     result = pthread_getspecific(s)
 
-  type CpuSet {.importc: "cpu_set_t", header: schedh.} = object
-     when defined(linux) and defined(amd64):
-       abi: array[1024 div (8 * sizeof(culong)), culong]
-
-  proc cpusetZero(s: var CpuSet) {.importc: "CPU_ZERO", header: schedh.}
-  proc cpusetIncl(cpu: cint; s: var CpuSet) {.
-    importc: "CPU_SET", header: schedh.}
-
-  proc setAffinity(thread: SysThread; setsize: csize_t; s: var CpuSet) {.
-    importc: "pthread_setaffinity_np", header: pthreadh.}
-
-
-const
-  emulatedThreadVars = compileOption("tlsEmulation")
 
 when emulatedThreadVars:
   # the compiler generates this proc for us, so that we can get the size of
   # the thread local var block; we use this only for sanity checking though
   proc nimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".}
 
-# we preallocate a fixed size for thread local storage, so that no heap
-# allocations are needed. Currently less than 16K are used on a 64bit machine.
-# We use ``float`` for proper alignment:
-const nimTlsSize {.intdefine.} = 16000
-type
-  ThreadLocalStorage = array[0..(nimTlsSize div sizeof(float)), float]
-  PGcThread = ptr GcThread
-  GcThread {.pure, inheritable.} = object
-    when emulatedThreadVars:
-      tls: ThreadLocalStorage
-    else:
-      nil
-    when hasSharedHeap:
-      next, prev: PGcThread
-      stackBottom, stackTop: pointer
-      stackSize: int
-    else:
-      nil
+
 
 when emulatedThreadVars:
   var globalsSlot: ThreadVarSlot
@@ -245,4 +122,4 @@ when not defined(useNimRtl):
     if nimThreadVarsSize() > sizeof(ThreadLocalStorage):
       c_fprintf(cstderr, """too large thread local storage size requested,
 use -d:\"nimTlsSize=X\" to setup even more or stop using unittest.nim""")
-      quit 1
+      rawQuit 1
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
deleted file mode 100644
index 6b858c4bb..000000000
--- a/lib/system/threads.nim
+++ /dev/null
@@ -1,429 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Thread support for Nim.
-##
-## **Note**: This is part of the system module. Do not import it directly.
-## To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## Nim's memory model for threads is quite different from other common
-## programming languages (C, Pascal): Each thread has its own
-## (garbage collected) heap and sharing of memory is restricted. This helps
-## to prevent race conditions and improves efficiency. See `the manual for
-## details of this memory model <manual.html#threads>`_.
-##
-## Examples
-## ========
-##
-## .. code-block:: Nim
-##
-##  import locks
-##
-##  var
-##    thr: array[0..4, Thread[tuple[a,b: int]]]
-##    L: Lock
-##
-##  proc threadFunc(interval: tuple[a,b: int]) {.thread.} =
-##    for i in interval.a..interval.b:
-##      acquire(L) # lock stdout
-##      echo i
-##      release(L)
-##
-##  initLock(L)
-##
-##  for i in 0..high(thr):
-##    createThread(thr[i], threadFunc, (i*10, i*10+5))
-##  joinThreads(thr)
-
-when not declared(ThisIsSystem):
-  {.error: "You must not import this module explicitly".}
-
-const
-  StackGuardSize = 4096
-  ThreadStackMask =
-    when defined(genode):
-      1024*64*sizeof(int)-1
-    else:
-      1024*256*sizeof(int)-1
-  ThreadStackSize = ThreadStackMask+1 - StackGuardSize
-
-#const globalsSlot = ThreadVarSlot(0)
-#sysAssert checkSlot.int == globalsSlot.int
-
-# create for the main thread. Note: do not insert this data into the list
-# of all threads; it's not to be stopped etc.
-when not defined(useNimRtl):
-  #when not defined(createNimRtl): initStackBottom()
-  when declared(initGC):
-    initGC()
-    when not emulatedThreadVars:
-      type ThreadType {.pure.} = enum
-        None = 0,
-        NimThread = 1,
-        ForeignThread = 2
-      var
-        threadType {.rtlThreadVar.}: ThreadType
-
-      threadType = ThreadType.NimThread
-
-# We jump through some hops here to ensure that Nim thread procs can have
-# the Nim calling convention. This is needed because thread procs are
-# ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just
-# use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway.
-
-type
-  Thread*[TArg] = object
-    core: PGcThread
-    sys: SysThread
-    when TArg is void:
-      dataFn: proc () {.nimcall, gcsafe.}
-    else:
-      dataFn: proc (m: TArg) {.nimcall, gcsafe.}
-      data: TArg
-
-var
-  threadDestructionHandlers {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}]
-
-proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) =
-  ## Registers a *thread local* handler that is called at the thread's
-  ## destruction.
-  ##
-  ## A thread is destructed when the ``.thread`` proc returns
-  ## normally or when it raises an exception. Note that unhandled exceptions
-  ## in a thread nevertheless cause the whole process to die.
-  threadDestructionHandlers.add handler
-
-template afterThreadRuns() =
-  for i in countdown(threadDestructionHandlers.len-1, 0):
-    threadDestructionHandlers[i]()
-
-when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions):
-  proc deallocOsPages() {.rtl, raises: [].}
-
-proc threadTrouble() {.raises: [], gcsafe.}
-  ## defined in system/excpt.nim
-
-when defined(boehmgc):
-  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
-  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
-    {.importc: "GC_call_with_stack_base", boehmGC.}
-  proc boehmGC_register_my_thread(sb: pointer)
-    {.importc: "GC_register_my_thread", boehmGC.}
-  proc boehmGC_unregister_my_thread()
-    {.importc: "GC_unregister_my_thread", boehmGC.}
-
-  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} =
-    boehmGC_register_my_thread(sb)
-    try:
-      let thrd = cast[ptr Thread[TArg]](thrd)
-      when TArg is void:
-        thrd.dataFn()
-      else:
-        thrd.dataFn(thrd.data)
-    except:
-      threadTrouble()
-    finally:
-      afterThreadRuns()
-    boehmGC_unregister_my_thread()
-else:
-  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
-    try:
-      when TArg is void:
-        thrd.dataFn()
-      else:
-        when defined(nimV2):
-          thrd.dataFn(thrd.data)
-        else:
-          var x: TArg
-          deepCopy(x, thrd.data)
-          thrd.dataFn(x)
-    except:
-      threadTrouble()
-    finally:
-      afterThreadRuns()
-
-proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
-  when defined(boehmgc):
-    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
-  elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors:
-    var p {.volatile.}: pointer
-    # init the GC for refc/markandsweep
-    nimGC_setStackBottom(addr(p))
-    initGC()
-    when declared(threadType):
-      threadType = ThreadType.NimThread
-    threadProcWrapDispatch[TArg](thrd)
-    when declared(deallocOsPages): deallocOsPages()
-  else:
-    threadProcWrapDispatch(thrd)
-
-template threadProcWrapperBody(closure: untyped): untyped =
-  var thrd = cast[ptr Thread[TArg]](closure)
-  var core = thrd.core
-  when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core)
-  threadProcWrapStackFrame(thrd)
-  # Since an unhandled exception terminates the whole process (!), there is
-  # no need for a ``try finally`` here, nor would it be correct: The current
-  # exception is tried to be re-raised by the code-gen after the ``finally``!
-  # However this is doomed to fail, because we already unmapped every heap
-  # page!
-
-  # mark as not running anymore:
-  thrd.core = nil
-  thrd.dataFn = nil
-  deallocShared(cast[pointer](core))
-
-{.push stack_trace:off.}
-when defined(windows):
-  proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} =
-    threadProcWrapperBody(closure)
-    # implicitly return 0
-elif defined(genode):
-   proc threadProcWrapper[TArg](closure: pointer) {.noconv.} =
-    threadProcWrapperBody(closure)
-else:
-  proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} =
-    threadProcWrapperBody(closure)
-{.pop.}
-
-proc running*[TArg](t: Thread[TArg]): bool {.inline.} =
-  ## Returns true if `t` is running.
-  result = t.dataFn != nil
-
-proc handle*[TArg](t: Thread[TArg]): SysThread {.inline.} =
-  ## Returns the thread handle of `t`.
-  result = t.sys
-
-when hostOS == "windows":
-  const MAXIMUM_WAIT_OBJECTS = 64
-
-  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
-    ## Waits for the thread `t` to finish.
-    discard waitForSingleObject(t.sys, -1'i32)
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## Waits for every thread in `t` to finish.
-    var a: array[MAXIMUM_WAIT_OBJECTS, SysThread]
-    var k = 0
-    while k < len(t):
-      var count = min(len(t) - k, MAXIMUM_WAIT_OBJECTS)
-      for i in 0..(count - 1): a[i] = t[i + k].sys
-      discard waitForMultipleObjects(int32(count),
-                                     cast[ptr SysThread](addr(a)), 1, -1)
-      inc(k, MAXIMUM_WAIT_OBJECTS)
-
-elif defined(genode):
-  proc joinThread*[TArg](t: Thread[TArg]) {.importcpp.}
-    ## Waits for the thread `t` to finish.
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## Waits for every thread in `t` to finish.
-    for i in 0..t.high: joinThread(t[i])
-
-else:
-  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
-    ## Waits for the thread `t` to finish.
-    discard pthread_join(t.sys, nil)
-
-  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
-    ## Waits for every thread in `t` to finish.
-    for i in 0..t.high: joinThread(t[i])
-
-when false:
-  # XXX a thread should really release its heap here somehow:
-  proc destroyThread*[TArg](t: var Thread[TArg]) =
-    ## Forces the thread `t` to terminate. This is potentially dangerous if
-    ## you don't have full control over `t` and its acquired resources.
-    when hostOS == "windows":
-      discard TerminateThread(t.sys, 1'i32)
-    else:
-      discard pthread_cancel(t.sys)
-    when declared(registerThread): unregisterThread(addr(t))
-    t.dataFn = nil
-    ## if thread `t` already exited, `t.core` will be `null`.
-    if not isNil(t.core):
-      deallocShared(t.core)
-      t.core = nil
-
-when hostOS == "windows":
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    ## Creates a new thread `t` and starts its execution.
-    ##
-    ## Entry point is the proc `tp`.
-    ## `param` is passed to `tp`. `TArg` can be ``void`` if you
-    ## don't need to pass any data to the thread.
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.core.stackSize = ThreadStackSize
-    var dummyThreadId: int32
-    t.sys = createThread(nil, ThreadStackSize, threadProcWrapper[TArg],
-                         addr(t), 0'i32, dummyThreadId)
-    if t.sys <= 0:
-      raise newException(ResourceExhaustedError, "cannot create thread")
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    ## Pins a thread to a `CPU`:idx:.
-    ##
-    ## In other words sets a thread's `affinity`:idx:.
-    ## If you don't know what this means, you shouldn't use this proc.
-    setThreadAffinityMask(t.sys, uint(1 shl cpu))
-
-elif defined(genode):
-  var affinityOffset: cuint = 1
-    ## CPU affinity offset for next thread, safe to roll-over.
-
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.stackSize = ThreadStackSize
-    t.sys.initThread(
-      runtimeEnv,
-      ThreadStackSize.culonglong,
-      threadProcWrapper[TArg], addr(t), affinityOffset)
-    inc affinityOffset
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    {.hint: "cannot change Genode thread CPU affinity after initialization".}
-    discard
-
-else:
-  proc createThread*[TArg](t: var Thread[TArg],
-                           tp: proc (arg: TArg) {.thread, nimcall.},
-                           param: TArg) =
-    ## Creates a new thread `t` and starts its execution.
-    ##
-    ## Entry point is the proc `tp`. `param` is passed to `tp`.
-    ## `TArg` can be ``void`` if you
-    ## don't need to pass any data to the thread.
-    t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
-
-    when TArg isnot void: t.data = param
-    t.dataFn = tp
-    when hasSharedHeap: t.core.stackSize = ThreadStackSize
-    var a {.noinit.}: Pthread_attr
-    doAssert pthread_attr_init(a) == 0
-    doAssert pthread_attr_setstacksize(a, ThreadStackSize) == 0
-    if pthread_create(t.sys, a, threadProcWrapper[TArg], addr(t)) != 0:
-      raise newException(ResourceExhaustedError, "cannot create thread")
-    doAssert pthread_attr_destroy(a) == 0
-
-  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
-    ## Pins a thread to a `CPU`:idx:.
-    ##
-    ## In other words sets a thread's `affinity`:idx:.
-    ## If you don't know what this means, you shouldn't use this proc.
-    when not defined(macosx):
-      var s {.noinit.}: CpuSet
-      cpusetZero(s)
-      cpusetIncl(cpu.cint, s)
-      setAffinity(t.sys, csize_t(sizeof(s)), s)
-
-proc createThread*(t: var Thread[void], tp: proc () {.thread, nimcall.}) =
-  createThread[void](t, tp)
-
-# we need to cache current threadId to not perform syscall all the time
-var threadId {.threadvar.}: int
-
-when defined(windows):
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(getCurrentThreadId())
-    result = threadId
-
-elif defined(linux):
-  proc syscall(arg: clong): clong {.varargs, importc: "syscall", header: "<unistd.h>".}
-  when defined(amd64):
-    const NR_gettid = clong(186)
-  else:
-    var NR_gettid {.importc: "__NR_gettid", header: "<sys/syscall.h>".}: clong
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(syscall(NR_gettid))
-    result = threadId
-
-elif defined(dragonfly):
-  proc lwp_gettid(): int32 {.importc, header: "unistd.h".}
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(lwp_gettid())
-    result = threadId
-
-elif defined(openbsd):
-  proc getthrid(): int32 {.importc: "getthrid", header: "<unistd.h>".}
-
-  proc getThreadId*(): int =
-    ## get the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(getthrid())
-    result = threadId
-
-elif defined(netbsd):
-  proc lwp_self(): int32 {.importc: "_lwp_self", header: "<lwp.h>".}
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(lwp_self())
-    result = threadId
-
-elif defined(freebsd):
-  proc syscall(arg: cint, arg0: ptr cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
-  var SYS_thr_self {.importc:"SYS_thr_self", header:"<sys/syscall.h>"}: cint
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    var tid = 0.cint
-    if threadId == 0:
-      discard syscall(SYS_thr_self, addr tid)
-      threadId = tid
-    result = threadId
-
-elif defined(macosx):
-  proc syscall(arg: cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
-  var SYS_thread_selfid {.importc:"SYS_thread_selfid", header:"<sys/syscall.h>".}: cint
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(syscall(SYS_thread_selfid))
-    result = threadId
-
-elif defined(solaris):
-  type thread_t {.importc: "thread_t", header: "<thread.h>".} = distinct int
-  proc thr_self(): thread_t {.importc, header: "<thread.h>".}
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(thr_self())
-    result = threadId
-
-elif defined(haiku):
-  type thr_id {.importc: "thread_id", header: "<OS.h>".} = distinct int32
-  proc find_thread(name: cstring): thr_id {.importc, header: "<OS.h>".}
-
-  proc getThreadId*(): int =
-    ## Gets the ID of the currently running thread.
-    if threadId == 0:
-      threadId = int(find_thread(nil))
-    result = threadId
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
deleted file mode 100644
index aabcbdc90..000000000
--- a/lib/system/widestrs.nim
+++ /dev/null
@@ -1,211 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Nim support for C/C++'s `wide strings`:idx:. This is part of the system
-# module! Do not import it directly!
-
-#when not declared(ThisIsSystem):
-#  {.error: "You must not import this module explicitly".}
-
-type
-  Utf16Char* = distinct int16
-
-when defined(nimv2):
-
-  type
-    WideCString* = ptr UncheckedArray[Utf16Char]
-
-    WideCStringObj* = object
-      bytes: int
-      data: WideCString
-
-  proc `=destroy`(a: var WideCStringObj) =
-    if a.data != nil:
-      deallocShared(a.data)
-      a.data = nil
-
-  proc `=`(a: var WideCStringObj; b: WideCStringObj) {.error.}
-
-  proc `=sink`(a: var WideCStringObj; b: WideCStringObj) =
-    a.bytes = b.bytes
-    a.data = b.data
-
-  proc createWide(a: var WideCStringObj; bytes: int) =
-    a.bytes = bytes
-    a.data = cast[typeof(a.data)](allocShared0(bytes))
-
-  template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx]
-  template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val
-
-  template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil)
-
-  converter toWideCString*(x: WideCStringObj): WideCString {.inline.} =
-    result = x.data
-
-else:
-  template nullWide(): untyped = nil
-
-  type
-    WideCString* = ref UncheckedArray[Utf16Char]
-    WideCStringObj* = WideCString
-
-  template createWide(a; L) =
-    unsafeNew(a, L)
-
-proc ord(arg: Utf16Char): int = int(cast[uint16](arg))
-
-proc len*(w: WideCString): int =
-  ## returns the length of a widestring. This traverses the whole string to
-  ## find the binary zero end marker!
-  result = 0
-  while int16(w[result]) != 0'i16: inc result
-
-const
-  UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
-  UNI_MAX_BMP = 0x0000FFFF
-  UNI_MAX_UTF16 = 0x0010FFFF
-  UNI_MAX_UTF32 = 0x7FFFFFFF
-  UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
-
-  halfShift = 10
-  halfBase = 0x0010000
-  halfMask = 0x3FF
-
-  UNI_SUR_HIGH_START = 0xD800
-  UNI_SUR_HIGH_END = 0xDBFF
-  UNI_SUR_LOW_START = 0xDC00
-  UNI_SUR_LOW_END = 0xDFFF
-  UNI_REPL = 0xFFFD
-
-template ones(n: untyped): untyped = ((1 shl n)-1)
-
-template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
-  ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
-  ## `i` is incremented by the number of bytes that have been processed.
-  bind ones
-
-  if ord(s[i]) <= 127:
-    result = ord(s[i])
-    when doInc: inc(i)
-  elif ord(s[i]) shr 5 == 0b110:
-    #assert(ord(s[i+1]) shr 6 == 0b10)
-    if i <= L - 2:
-      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-      when doInc: inc(i, 2)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 4 == 0b1110:
-    if i <= L - 3:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(4)) shl 12 or
-               (ord(s[i+1]) and ones(6)) shl 6 or
-               (ord(s[i+2]) and ones(6))
-      when doInc: inc(i, 3)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 3 == 0b11110:
-    if i <= L - 4:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      #assert(ord(s[i+3]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(3)) shl 18 or
-               (ord(s[i+1]) and ones(6)) shl 12 or
-               (ord(s[i+2]) and ones(6)) shl 6 or
-               (ord(s[i+3]) and ones(6))
-      when doInc: inc(i, 4)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  else:
-    result = 0xFFFD
-    when doInc: inc(i)
-
-iterator runes(s: cstring, L: int): int =
-  var
-    i = 0
-    result: int
-  while i < L:
-    fastRuneAt(s, i, L, result, true)
-    yield result
-
-proc newWideCString*(source: cstring, L: int): WideCStringObj =
-  createWide(result, L * 2 + 2)
-  var d = 0
-  for ch in runes(source, L):
-
-    if ch <= UNI_MAX_BMP:
-      if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END:
-        result[d] = UNI_REPLACEMENT_CHAR
-      else:
-        result[d] = cast[Utf16Char](uint16(ch))
-    elif ch > UNI_MAX_UTF16:
-      result[d] = UNI_REPLACEMENT_CHAR
-    else:
-      let ch = ch - halfBase
-      result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START))
-      inc d
-      result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START))
-    inc d
-  result[d] = Utf16Char(0)
-
-proc newWideCString*(s: cstring): WideCStringObj =
-  if s.isNil: return nullWide
-
-  result = newWideCString(s, s.len)
-
-proc newWideCString*(s: string): WideCStringObj =
-  result = newWideCString(s, s.len)
-
-proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
-  result = newStringOfCap(estimate + estimate shr 2)
-
-  var i = 0
-  while w[i].int16 != 0'i16:
-    var ch = ord(w[i])
-    inc i
-    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
-      # If the 16 bits following the high surrogate are in the source buffer...
-      let ch2 = ord(w[i])
-
-      # If it's a low surrogate, convert to UTF32:
-      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
-        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
-        inc i
-      else:
-        #invalid UTF-16
-        ch = replacement
-    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
-      #invalid UTF-16
-      ch = replacement
-
-    if ch < 0x80:
-      result.add chr(ch)
-    elif ch < 0x800:
-      result.add chr((ch shr 6) or 0xc0)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch < 0x10000:
-      result.add chr((ch shr 12) or 0xe0)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch <= 0x10FFFF:
-      result.add chr((ch shr 18) or 0xf0)
-      result.add chr(((ch shr 12) and 0x3f) or 0x80)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    else:
-      # replacement char(in case user give very large number):
-      result.add chr(0xFFFD shr 12 or 0b1110_0000)
-      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
-
-proc `$`*(s: WideCString): string =
-  result = s $ 80