48 files changed, 1132 insertions, 2033 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 9c8f70c11..3de6d8713 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -20,6 +20,37 @@ template track(op, address, size) =
 
 # We manage *chunks* of memory. Each chunk is a multiple of the page size.
 # Each chunk starts at an address that is divisible by the page size.
+# Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
+
+# An allocation of a small pointer looks approximately like this
+#[
+
+  alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
+              |                                                                                                   |
+              v                                                                                                   |
+    Free chunk available                                                                                          |
+              |                                                                                                   |
+              v                                                                                                   v
+      Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
+    (may not add new cells)                                                                                       ^
+              |                                                                                                   |
+              v                                                                                                   |
+     Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
+]#
+# so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
+#
+#
+# A deallocation of a small pointer then looks like this
+#[
+  dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active    -> Chunk is completely unused -----> Chunk references no foreign cells
+                                      |                                       |                   (Add cell into the current chunk)                 |                  Return the current chunk back to tlsf
+                                      |                                       |                                   |                                 |
+                                      v                                       v                                   v                                 v
+                      A different thread owns this chunk.     The current chunk is not active.          chunk.free was < size      Chunk references foreign cells, noop
+                      Add the cell to a.sharedFreeLists      Add the cell into the active chunk          Activate the chunk                       (end)
+                                    (end)                                    (end)                              (end)
+]#
+# So "true" deallocation is delayed for as long as possible in favor of reusing cells.
 
 const
   nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
@@ -71,6 +102,8 @@ const
 
 type
   FreeCell {.final, pure.} = object
+    # A free cell is a pointer that has been freed, meaning it became available for reuse.
+    # It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
     next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
     when not defined(gcDestructors):
       zeroField: int       # 0 means cell is not used (overlaid with typ field)
@@ -90,22 +123,23 @@ type
 
   SmallChunk = object of BaseChunk
     next, prev: PSmallChunk  # chunks of the same size
-    freeList: ptr FreeCell
-    free: int            # how many bytes remain
-    acc: int             # accumulator for small object allocation
-    when defined(gcDestructors):
-      sharedFreeList: ptr FreeCell # make no attempt at avoiding false sharing for now for this object field
-    when defined(nimAlignPragma):
-      data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
-    else:
-      data: UncheckedArray[byte]
+    freeList: ptr FreeCell   # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
+                             #  Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
+    free: int32              # Bytes this chunk is able to provide using both the accumulator and free cells.
+                             # When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
+                             #  reaches dealloc while the source chunk is active.
+                             # Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
+    acc: uint32              # Offset from data, used when there are no free cells available but the chunk is considered free.
+    foreignCells: int        # When a free cell is given to a chunk that is not its origin,
+                             #  both the cell and the source chunk are considered foreign.
+                             # Receiving a foreign cell can happen both when deallocating from another thread or when
+                             #  the active chunk in `a.freeSmallChunks` is not the current chunk.
+                             # Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
 
   BigChunk = object of BaseChunk # not necessarily > PageSize!
     next, prev: PBigChunk    # chunks of the same (or bigger) size
-    when defined(nimAlignPragma):
-      data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
-    else:
-      data: UncheckedArray[byte]
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
 
   HeapLinks = object
     len: int
@@ -115,7 +149,12 @@ type
   MemRegion = object
     when not defined(gcDestructors):
       minLargeObj, maxLargeObj: int
-    freeSmallChunks: array[0..max(1,SmallChunkSize div MemAlign-1), PSmallChunk]
+    freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
+      # List of available chunks per size class. Only one is expected to be active per class.
+    when defined(gcDestructors):
+      sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
+        # When a thread frees a pointer it did not create, it must not adjust the counters.
+        # Instead, the cell is placed here and deferred until the next allocation.
     flBitmap: uint32
     slBitmap: array[RealFli, uint32]
     matrix: array[RealFli, array[MaxSli, PBigChunk]]
@@ -274,6 +313,20 @@ proc getMaxMem(a: var MemRegion): int =
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
 
+const nimMaxHeap {.intdefine.} = 0
+
+proc allocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osAllocPages(size)
+
+proc tryAllocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osTryAllocPages(size)
+
 proc llAlloc(a: var MemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
   # is done at the end of the allocator's life time.
@@ -283,14 +336,14 @@ proc llAlloc(a: var MemRegion, size: int): pointer =
     # is one page:
     sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
     var old = a.llmem # can be nil and is correct with nil
-    a.llmem = cast[PLLChunk](osAllocPages(PageSize))
+    a.llmem = cast[PLLChunk](allocPages(a, PageSize))
     when defined(nimAvlcorruption):
       trackLocation(a.llmem, PageSize)
     incCurrMem(a, PageSize)
     a.llmem.size = PageSize - sizeof(LLChunk)
     a.llmem.acc = sizeof(LLChunk)
     a.llmem.next = old
-  result = cast[pointer](cast[ByteAddress](a.llmem) + a.llmem.acc)
+  result = cast[pointer](cast[int](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
@@ -326,7 +379,7 @@ when not defined(gcDestructors):
     n.link[0] = a.freeAvlNodes
     a.freeAvlNodes = n
 
-proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
+proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int): ptr HeapLinks =
   var it = addr(a.heapLinks)
   while it != nil and it.len >= it.chunks.len: it = it.next
   if it == nil:
@@ -335,10 +388,12 @@ proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int) =
     a.heapLinks.next = n
     n.chunks[0] = (p, size)
     n.len = 1
+    result = n
   else:
     let L = it.len
     it.chunks[L] = (p, size)
     inc it.len
+    result = it
 
 when not defined(gcDestructors):
   include "system/avltree"
@@ -422,8 +477,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
           var c = cast[PSmallChunk](c)
 
           let size = c.size
-          var a = cast[ByteAddress](addr(c.data))
-          let limit = a + c.acc
+          var a = cast[int](addr(c.data))
+          let limit = a + c.acc.int
           while a <% limit:
             yield cast[pointer](a)
             a = a +% size
@@ -441,13 +496,13 @@ when not defined(gcDestructors):
 
 # ------------- chunk management ----------------------------------------------
 proc pageIndex(c: PChunk): int {.inline.} =
-  result = cast[ByteAddress](c) shr PageShift
+  result = cast[int](c) shr PageShift
 
 proc pageIndex(p: pointer): int {.inline.} =
-  result = cast[ByteAddress](p) shr PageShift
+  result = cast[int](p) shr PageShift
 
 proc pageAddr(p: pointer): PChunk {.inline.} =
-  result = cast[PChunk](cast[ByteAddress](p) and not PageMask)
+  result = cast[PChunk](cast[int](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
 when false:
@@ -459,15 +514,10 @@ when false:
                 it, it.next, it.prev, it.size)
       it = it.next
 
-const nimMaxHeap {.intdefine.} = 0
-
 proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
   when not defined(emscripten):
     if not a.blockChunkSizeIncrease:
       let usedMem = a.occ #a.currMem # - a.freeMem
-      when nimMaxHeap != 0:
-        if usedMem > nimMaxHeap * 1024 * 1024:
-          raiseOutOfMem()
       if usedMem < 64 * 1024:
         a.nextChunkSize = PageSize*4
       else:
@@ -476,32 +526,32 @@ proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
 
   var size = size
   if size > a.nextChunkSize:
-    result = cast[PBigChunk](osAllocPages(size))
+    result = cast[PBigChunk](allocPages(a, size))
   else:
-    result = cast[PBigChunk](osTryAllocPages(a.nextChunkSize))
+    result = cast[PBigChunk](tryAllocPages(a, a.nextChunkSize))
     if result == nil:
-      result = cast[PBigChunk](osAllocPages(size))
+      result = cast[PBigChunk](allocPages(a, size))
       a.blockChunkSizeIncrease = true
     else:
       size = a.nextChunkSize
 
   incCurrMem(a, size)
   inc(a.freeMem, size)
-  a.addHeapLink(result, size)
+  let heapLink = a.addHeapLink(result, size)
   when defined(debugHeapLinks):
     cprintf("owner: %p; result: %p; next pointer %p; size: %ld\n", addr(a),
-      result, result.heapLink, result.size)
+      result, heapLink, size)
 
   when defined(memtracker):
     trackLocation(addr result.size, sizeof(int))
 
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "requestOsChunks 1")
+  sysAssert((cast[int](result) and PageMask) == 0, "requestOsChunks 1")
   #zeroMem(result, size)
   result.next = nil
   result.prev = nil
   result.size = size
   # update next.prevSize:
-  var nxt = cast[ByteAddress](result) +% size
+  var nxt = cast[int](result) +% size
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 2")
   var next = cast[PChunk](nxt)
   if pageIndex(next) in a.chunkStarts:
@@ -509,7 +559,7 @@ proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
     next.prevSize = size or (next.prevSize and 1)
   # set result.prevSize:
   var lastSize = if a.lastSize != 0: a.lastSize else: PageSize
-  var prv = cast[ByteAddress](result) -% lastSize
+  var prv = cast[int](result) -% lastSize
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 3")
   var prev = cast[PChunk](prv)
   if pageIndex(prev) in a.chunkStarts and prev.size == lastSize:
@@ -555,13 +605,13 @@ proc listRemove[T](head: var T, c: T) {.inline.} =
 
 proc updatePrevSize(a: var MemRegion, c: PBigChunk,
                     prevSize: int) {.inline.} =
-  var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-  sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "updatePrevSize")
+  var ri = cast[PChunk](cast[int](c) +% c.size)
+  sysAssert((cast[int](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
     ri.prevSize = prevSize or (ri.prevSize and 1)
 
 proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
-  result = cast[PBigChunk](cast[ByteAddress](c) +% size)
+  result = cast[PBigChunk](cast[int](c) +% size)
   result.size = c.size - size
   track("result.size", addr result.size, sizeof(int))
   when not defined(nimOptimizedSplitChunk):
@@ -590,8 +640,8 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
   when coalescLeft:
     let prevSize = c.prevSize
     if prevSize != 0:
-      var le = cast[PChunk](cast[ByteAddress](c) -% prevSize)
-      sysAssert((cast[ByteAddress](le) and PageMask) == 0, "freeBigChunk 4")
+      var le = cast[PChunk](cast[int](c) -% prevSize)
+      sysAssert((cast[int](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
         sysAssert(not isSmallChunk(le), "freeBigChunk 5")
         if not isSmallChunk(le) and le.size < MaxBigChunkSize:
@@ -607,8 +657,8 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
             addChunkToMatrix(a, c)
             c = rest
   when coalescRight:
-    var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
-    sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "freeBigChunk 2")
+    var ri = cast[PChunk](cast[int](c) +% c.size)
+    sysAssert((cast[int](ri) and PageMask) == 0, "freeBigChunk 2")
     if isAccessible(a, ri) and chunkUnused(ri):
       sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
       if not isSmallChunk(ri) and c.size < MaxBigChunkSize:
@@ -660,7 +710,7 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
     releaseSys a.lock
 
 proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
-  result = cast[PBigChunk](osAllocPages(size))
+  result = cast[PBigChunk](allocPages(a, size))
   when RegionHasLock:
     if not a.lockActive:
       a.lockActive = true
@@ -669,7 +719,7 @@ proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
   incCurrMem(a, size)
   # XXX add this to the heap links. But also remove it from it later.
   when false: a.addHeapLink(result, size)
-  sysAssert((cast[ByteAddress](result) and PageMask) == 0, "getHugeChunk")
+  sysAssert((cast[int](result) and PageMask) == 0, "getHugeChunk")
   result.next = nil
   result.prev = nil
   result.size = size
@@ -772,40 +822,42 @@ when defined(gcDestructors):
     sysAssert c.next == nil, "c.next pointer must be nil"
     atomicPrepend a.sharedFreeListBigChunks, c
 
-  proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell) {.inline.} =
-    atomicPrepend c.sharedFreeList, f
+  proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell; size: int) {.inline.} =
+    atomicPrepend c.owner.sharedFreeLists[size], f
+
+  const MaxSteps = 20
 
   proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
     # rawDealloc did NOT do the usual:
     # `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
     # memory locations. We have to compensate here for these for the entire list.
-    # Well, not for the entire list, but for `max` elements of the list because
-    # we split the list in order to achieve bounded response times.
     var it = c.freeList
-    var x = 0
-    var maxIters = 20 # make it time-bounded
+    var total = 0
     while it != nil:
-      if maxIters == 0:
-        let rest = it.next.loada
-        it.next.storea nil
-        addToSharedFreeList(c, rest)
-        break
-      inc x, size
-      it = it.next.loada
-      dec maxIters
-    inc(c.free, x)
-    dec(a.occ, x)
+      inc total, size
+      let chunk = cast[PSmallChunk](pageAddr(it))
+      if c != chunk:
+        # The cell is foreign, potentially even from a foreign thread.
+        # It must block the current chunk from being freed, as doing so would leak memory.
+        inc c.foreignCells
+      it = it.next
+    # By not adjusting the foreign chunk we reserve space in it to prevent deallocation
+    inc(c.free, total)
+    dec(a.occ, total)
 
   proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
     var it = root
-    var maxIters = 20 # make it time-bounded
+    var maxIters = MaxSteps # make it time-bounded
     while true:
+      let rest = it.next.loada
+      it.next.storea nil
+      deallocBigChunk(a, cast[PBigChunk](it))
       if maxIters == 0:
-        let rest = it.next.loada
-        it.next.storea nil
-        addToSharedFreeListBigChunks(a, rest)
+        if rest != nil:
+          addToSharedFreeListBigChunks(a, rest)
+          sysAssert a.sharedFreeListBigChunks != nil, "re-enqueing failed"
         break
-      it = it.next.loada
+      it = rest
       dec maxIters
       if it == nil: break
 
@@ -816,61 +868,90 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
   var size = roundup(requestedSize, MemAlign)
   sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
-
   sysAssert(size >= requestedSize, "insufficient allocated size!")
   #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
+
   if size <= SmallChunkSize-smallChunkOverhead():
+    template fetchSharedCells(tc: PSmallChunk) =
+      # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
+      when defined(gcDestructors):
+        if tc.freeList == nil:
+          when hasThreadSupport:
+            # Steal the entire list from `sharedFreeList`:
+            tc.freeList = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED)
+          else:
+            tc.freeList = a.sharedFreeLists[s]
+            a.sharedFreeLists[s] = nil
+          # if `tc.freeList` isn't nil, `tc` will gain capacity.
+          # We must calculate how much it gained and how many foreign cells are included.
+          compensateCounters(a, tc, size)
+
     # allocate a small block: for small chunks, we use only its next pointer
     let s = size div MemAlign
     var c = a.freeSmallChunks[s]
     if c == nil:
+      # There is no free chunk of the requested size available, we need a new one.
       c = getSmallChunk(a)
+      # init all fields in case memory didn't get zeroed
       c.freeList = nil
+      c.foreignCells = 0
       sysAssert c.size == PageSize, "rawAlloc 3"
       c.size = size
-      c.acc = size
-      when defined(gcDestructors):
-        c.sharedFreeList = nil
-      c.free = SmallChunkSize - smallChunkOverhead() - size
+      c.acc = size.uint32
+      c.free = SmallChunkSize - smallChunkOverhead() - size.int32
       sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
       c.next = nil
       c.prev = nil
-      listAdd(a.freeSmallChunks[s], c)
+      # Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
+      # For those single cell chunks, we would otherwise have to allocate a new one almost every time.
+      fetchSharedCells(c)
+      if c.free >= size:
+        # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
+        #  we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
+        listAdd(a.freeSmallChunks[s], c)
       result = addr(c.data)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 4")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
     else:
+      # There is a free chunk of the requested size available, use it.
       sysAssert(allocInv(a), "rawAlloc: begin c != nil")
       sysAssert c.next != c, "rawAlloc 5"
       #if c.size != size:
       #  c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
-      when defined(gcDestructors):
-        if c.freeList == nil:
-          when hasThreadSupport:
-            c.freeList = atomicExchangeN(addr c.sharedFreeList, nil, ATOMIC_RELAXED)
-          else:
-            c.freeList = c.sharedFreeList
-            c.sharedFreeList = nil
-          compensateCounters(a, c, size)
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
+        sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
-        result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
+        result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
         inc(c.acc, size)
       else:
+        # There are free cells available, prefer them over the accumulator
         result = c.freeList
         when not defined(gcDestructors):
           sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
         c.freeList = c.freeList.next
+        if cast[PSmallChunk](pageAddr(result)) != c:
+          # This cell isn't a blocker for the current chunk's deallocation anymore
+          dec(c.foreignCells)
+        else:
+          sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
+      # Even if the cell we return is foreign, the local chunk's capacity decreases.
+      # The capacity was previously reserved in the source chunk (when it first got allocated),
+      #  then added into the current chunk during dealloc,
+      #  so the source chunk will not be freed or leak memory because of this.
       dec(c.free, size)
-      sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
       sysAssert(allocInv(a), "rawAlloc: end c != nil")
-    sysAssert(allocInv(a), "rawAlloc: before c.free < size")
-    if c.free < size:
-      sysAssert(allocInv(a), "rawAlloc: before listRemove test")
-      listRemove(a.freeSmallChunks[s], c)
-      sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-    sysAssert(((cast[ByteAddress](result) and PageMask) - smallChunkOverhead()) %%
+      # We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
+      # If after the adjustment it turns out there's free cells available,
+      #  the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
+      fetchSharedCells(c)
+      sysAssert(allocInv(a), "rawAlloc: before c.free < size")
+      if c.free < size:
+        # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
+        sysAssert(allocInv(a), "rawAlloc: before listRemove test")
+        listRemove(a.freeSmallChunks[s], c)
+        sysAssert(allocInv(a), "rawAlloc: end listRemove test")
+    sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
                size == 0, "rawAlloc 21")
     sysAssert(allocInv(a), "rawAlloc: end small size")
     inc a.occ, size
@@ -892,16 +973,16 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
     sysAssert c.prev == nil, "rawAlloc 10"
     sysAssert c.next == nil, "rawAlloc 11"
     result = addr(c.data)
-    sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13")
-    sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
+    sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
+    sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
     when not defined(gcDestructors):
       if a.root == nil: a.root = getBottom(a)
-      add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size)
+      add(a, a.root, cast[int](result), cast[int](result)+%size)
     inc a.occ, c.size
     trackSize(c.size)
   sysAssert(isAccessible(a, result), "rawAlloc 14")
   sysAssert(allocInv(a), "rawAlloc: end")
-  when logAlloc: cprintf("var pointer_%p = alloc(%ld)\n", result, requestedSize)
+  when logAlloc: cprintf("var pointer_%p = alloc(%ld) # %p\n", result, requestedSize, addr a)
 
 proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
   result = rawAlloc(a, requestedSize)
@@ -917,7 +998,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
   if isSmallChunk(c):
     # `p` is within a small chunk:
     var c = cast[PSmallChunk](c)
-    var s = c.size
+    let s = c.size
     #       ^ We might access thread foreign storage here.
     # The other thread cannot possibly free this block as it's still alive.
     var f = cast[ptr FreeCell](p)
@@ -926,37 +1007,54 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
       dec a.occ, s
       untrackSize(s)
       sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
-      sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
+      sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
                 s == 0, "rawDealloc 3")
       when not defined(gcDestructors):
-        #echo("setting to nil: ", $cast[ByteAddress](addr(f.zeroField)))
+        #echo("setting to nil: ", $cast[int](addr(f.zeroField)))
         sysAssert(f.zeroField != 0, "rawDealloc 1")
         f.zeroField = 0
-      f.next = c.freeList
-      c.freeList = f
       when overwriteFree:
         # set to 0xff to check for usage after free bugs:
         nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
                 s -% sizeof(FreeCell))
-      # check if it is not in the freeSmallChunks[s] list:
-      if c.free < s:
-        # add it to the freeSmallChunks[s] array:
-        listAdd(a.freeSmallChunks[s div MemAlign], c)
-        inc(c.free, s)
+      let activeChunk = a.freeSmallChunks[s div MemAlign]
+      if activeChunk != nil and c != activeChunk:
+        # This pointer is not part of the active chunk, lend it out
+        #  and do not adjust the current chunk (same logic as compensateCounters.)
+        # Put the cell into the active chunk,
+        #  may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
+        #  This queue would otherwise waste memory in the form of free cells until we return to those chunks.
+        f.next = activeChunk.freeList
+        activeChunk.freeList = f # lend the cell
+        inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
+        inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
       else:
-        inc(c.free, s)
-        if c.free == SmallChunkSize-smallChunkOverhead():
-          listRemove(a.freeSmallChunks[s div MemAlign], c)
-          c.size = SmallChunkSize
-          freeBigChunk(a, cast[PBigChunk](c))
+        f.next = c.freeList
+        c.freeList = f
+        if c.free < s:
+          # The chunk could not have been active as it didn't have enough space to give
+          listAdd(a.freeSmallChunks[s div MemAlign], c)
+          inc(c.free, s)
+        else:
+          inc(c.free, s)
+          # Free only if the entire chunk is unused and there are no borrowed cells.
+          # If the chunk were to be freed while it references foreign cells,
+          #  the foreign chunks will leak memory and can never be freed.
+          if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
+            listRemove(a.freeSmallChunks[s div MemAlign], c)
+            c.size = SmallChunkSize
+            freeBigChunk(a, cast[PBigChunk](c))
     else:
+      when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
+
       when defined(gcDestructors):
-        addToSharedFreeList(c, f)
-    sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
+        addToSharedFreeList(c, f, s div MemAlign)
+    sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
                s == 0, "rawDealloc 2")
   else:
     # set to 0xff to check for usage after free bugs:
     when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
+    when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner)
     when defined(gcDestructors):
       if c.owner == addr(a):
         deallocBigChunk(a, cast[PBigChunk](c))
@@ -964,8 +1062,9 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
         addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
     else:
       deallocBigChunk(a, cast[PBigChunk](c))
+
   sysAssert(allocInv(a), "rawDealloc: end")
-  when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
+  #when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
 
 when not defined(gcDestructors):
   proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
@@ -974,9 +1073,9 @@ when not defined(gcDestructors):
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
+          var offset = (cast[int](p) and (PageSize-1)) -%
                       smallChunkOverhead()
-          result = (c.acc >% offset) and (offset %% c.size == 0) and
+          result = (c.acc.int >% offset) and (offset %% c.size == 0) and
             (cast[ptr FreeCell](p).zeroField >% 1)
         else:
           var c = cast[PBigChunk](c)
@@ -992,12 +1091,12 @@ when not defined(gcDestructors):
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
+          var offset = (cast[int](p) and (PageSize-1)) -%
                       smallChunkOverhead()
-          if c.acc >% offset:
-            sysAssert(cast[ByteAddress](addr(c.data)) +% offset ==
-                      cast[ByteAddress](p), "offset is not what you think it is")
-            var d = cast[ptr FreeCell](cast[ByteAddress](addr(c.data)) +%
+          if c.acc.int >% offset:
+            sysAssert(cast[int](addr(c.data)) +% offset ==
+                      cast[int](p), "offset is not what you think it is")
+            var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
                       offset -% (offset %% c.size))
             if d.zeroField >% 1:
               result = d
@@ -1024,7 +1123,7 @@ when not defined(gcDestructors):
 
 proc ptrSize(p: pointer): int =
   when not defined(gcDestructors):
-    var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
     var c = pageAddr(p)
     sysAssert(not chunkUnused(c), "ptrSize")
     result = c.size -% sizeof(FreeCell)
@@ -1042,7 +1141,7 @@ proc alloc(allocator: var MemRegion, size: Natural): pointer {.gcsafe.} =
     result = rawAlloc(allocator, size+sizeof(FreeCell))
     cast[ptr FreeCell](result).zeroField = 1 # mark it as used
     sysAssert(not isAllocatedPtr(allocator, result), "alloc")
-    result = cast[pointer](cast[ByteAddress](result) +% sizeof(FreeCell))
+    result = cast[pointer](cast[int](result) +% sizeof(FreeCell))
     track("alloc", result, size)
   else:
     result = rawAlloc(allocator, size)
@@ -1054,7 +1153,7 @@ proc alloc0(allocator: var MemRegion, size: Natural): pointer =
 proc dealloc(allocator: var MemRegion, p: pointer) =
   when not defined(gcDestructors):
     sysAssert(p != nil, "dealloc: p is nil")
-    var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
     sysAssert(x != nil, "dealloc: x is nil")
     sysAssert(isAccessible(allocator, x), "is not accessible")
     sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
@@ -1087,7 +1186,7 @@ proc deallocOsPages(a: var MemRegion) =
       let (p, size) = it.chunks[i]
       when defined(debugHeapLinks):
         cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a),
-          it, it.size, next)
+          it, size, next)
       sysAssert size >= PageSize, "origSize too small"
       osDeallocPages(p, size)
     it = next
@@ -1115,7 +1214,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
       result = interiorAllocatedPtr(allocator, p)
 
     proc isAllocatedPtr*(p: pointer): bool =
-      let p = cast[pointer](cast[ByteAddress](p)-%ByteAddress(sizeof(Cell)))
+      let p = cast[pointer](cast[int](p)-%ByteAddress(sizeof(Cell)))
       result = isAllocatedPtr(allocator, p)
 
   proc deallocOsPages = deallocOsPages(allocator)
@@ -1135,7 +1234,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
   proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
     result = realloc(allocator, p, newSize)
     if newSize > oldSize:
-      zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+      zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
 
   when false:
     proc countFreeMem(): int =
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index a8a4bd767..3098e17d6 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -65,7 +65,7 @@ elif defined(macosx) or defined(linux) or defined(freebsd) or
     SIGSEGV* = cint(11)
     SIGTERM* = cint(15)
     SIGPIPE* = cint(13)
-    SIG_DFL* = cast[CSighandlerT](0)
+    SIG_DFL* = CSighandlerT(nil)
 elif defined(haiku):
   const
     SIGABRT* = cint(6)
@@ -75,7 +75,7 @@ elif defined(haiku):
     SIGSEGV* = cint(11)
     SIGTERM* = cint(15)
     SIGPIPE* = cint(7)
-    SIG_DFL* = cast[CSighandlerT](0)
+    SIG_DFL* = CSighandlerT(nil)
 else:
   when defined(nimscript):
     {.error: "SIGABRT not ported to your platform".}
@@ -187,6 +187,9 @@ proc c_sprintf*(buf, frmt: cstring): cint {.
   importc: "sprintf", header: "<stdio.h>", varargs, noSideEffect.}
   # we use it only in a way that cannot lead to security issues
 
+proc c_snprintf*(buf: cstring, n: csize_t, frmt: cstring): cint {.
+  importc: "snprintf", header: "<stdio.h>", varargs, noSideEffect.}
+
 when defined(zephyr) and not defined(zephyrUseLibcMalloc):
   proc c_malloc*(size: csize_t): pointer {.
     importc: "k_malloc", header: "<kernel.h>".}
@@ -224,7 +227,7 @@ proc rawWriteString*(f: CFilePtr, s: cstring, length: int) {.compilerproc, nonRe
 
 proc rawWrite*(f: CFilePtr, s: cstring) {.compilerproc, nonReloadable, inline.} =
   # we cannot throw an exception here!
-  discard c_fwrite(s, 1, cast[csize_t](s.len), f)
+  discard c_fwrite(s, 1, c_strlen(s), f)
   discard c_fflush(f)
 
 {.pop.}
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
index d8527e1e4..d001fcaa5 100644
--- a/lib/system/arc.nim
+++ b/lib/system/arc.nim
@@ -26,6 +26,9 @@ else:
     rcMask = 0b111
     rcShift = 3      # shift by rcShift to get the reference counter
 
+const
+  orcLeakDetector = defined(nimOrcLeakDetector)
+
 type
   RefHeader = object
     rc: int # the object header is now a single RC field.
@@ -36,9 +39,21 @@ type
                    # in O(1) without doubly linked lists
     when defined(nimArcDebug) or defined(nimArcIds):
       refId: int
+    when defined(gcOrc) and orcLeakDetector:
+      filename: cstring
+      line: int
 
   Cell = ptr RefHeader
 
+template setFrameInfo(c: Cell) =
+  when orcLeakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
 template head(p: pointer): Cell =
   cast[Cell](cast[int](p) -% sizeof(RefHeader))
 
@@ -57,6 +72,21 @@ elif defined(nimArcIds):
 
   const traceId = -1
 
+when defined(gcAtomicArc) and hasThreadSupport:
+  template decrement(cell: Cell): untyped =
+    discard atomicDec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    discard atomicInc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    atomicLoadN(x.rc.addr, ATOMIC_ACQUIRE) shr rcShift
+else:
+  template decrement(cell: Cell): untyped =
+    dec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    inc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    x.rc shr rcShift
+
 proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
   let hdrSize = align(sizeof(RefHeader), alignment)
   let s = size + hdrSize
@@ -69,9 +99,10 @@ proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
     atomicInc gRefId
     if head(result).refId == traceId:
       writeStackTrace()
-      cfprintf(cstderr, "[nimNewObj] %p %ld\n", result, head(result).rc shr rcShift)
+      cfprintf(cstderr, "[nimNewObj] %p %ld\n", result, head(result).count)
   when traceCollector:
     cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
 
 proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
   # Same as 'newNewObj' but do not initialize the memory to zero.
@@ -90,21 +121,34 @@ proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
     atomicInc gRefId
     if head(result).refId == traceId:
       writeStackTrace()
-      cfprintf(cstderr, "[nimNewObjUninit] %p %ld\n", result, head(result).rc shr rcShift)
+      cfprintf(cstderr, "[nimNewObjUninit] %p %ld\n", result, head(result).count)
 
   when traceCollector:
     cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
 
 proc nimDecWeakRef(p: pointer) {.compilerRtl, inl.} =
-  dec head(p).rc, rcIncrement
+  decrement head(p)
+
+proc isUniqueRef*[T](x: ref T): bool {.inline.} =
+  ## Returns true if the object `x` points to is uniquely referenced. Such
+  ## an object can potentially be passed over to a different thread safely,
+  ## if great care is taken. This queries the internal reference count of
+  ## the object which is subject to lots of optimizations! In other words
+  ## the value of `isUniqueRef` can depend on the used compiler version and
+  ## optimizer setting.
+  ## Nevertheless it can be used as a very valuable debugging tool and can
+  ## be used to specify the constraints of a threading related API
+  ## via `assert isUniqueRef(x)`.
+  head(cast[pointer](x)).rc == 0
 
 proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
   when defined(nimArcDebug):
     if head(p).refId == traceId:
       writeStackTrace()
-      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).rc shr rcShift)
+      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).count)
 
-  inc head(p).rc, rcIncrement
+  increment head(p)
   when traceCollector:
     cprintf("[INCREF] %p\n", head(p))
 
@@ -173,17 +217,24 @@ proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
     when defined(nimArcDebug):
       if cell.refId == traceId:
         writeStackTrace()
-        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.rc shr rcShift)
-
-    if (cell.rc and not rcMask) == 0:
-      result = true
-      when traceCollector:
-        cprintf("[ABOUT TO DESTROY] %p\n", cell)
+        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)
+
+    when defined(gcAtomicArc) and hasThreadSupport:
+      # `atomicDec` returns the new value
+      if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
     else:
-      dec cell.rc, rcIncrement
-      # According to Lins it's correct to do nothing else here.
-      when traceCollector:
-        cprintf("[DeCREF] %p\n", cell)
+      if cell.count == 0:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+      else:
+        decrement cell
+        # According to Lins it's correct to do nothing else here.
+        when traceCollector:
+          cprintf("[DECREF] %p\n", cell)
 
 proc GC_unref*[T](x: ref T) =
   ## New runtime only supports this operation for 'ref T'.
@@ -196,16 +247,21 @@ proc GC_ref*[T](x: ref T) =
 
 when not defined(gcOrc):
   template GC_fullCollect* =
-    ## Forces a full garbage collection pass. With `--gc:arc` a nop.
+    ## Forces a full garbage collection pass. With `--mm:arc` a nop.
     discard
 
 template setupForeignThreadGc* =
-  ## With `--gc:arc` a nop.
+  ## With `--mm:arc` a nop.
   discard
 
 template tearDownForeignThreadGc* =
-  ## With `--gc:arc` a nop.
+  ## With `--mm:arc` a nop.
   discard
 
-proc isObjDisplayCheck(source: PNimTypeV2, targetDepth: int16, token: uint32): bool {.compilerRtl, inline.} =
+proc isObjDisplayCheck(source: PNimTypeV2, targetDepth: int16, token: uint32): bool {.compilerRtl, inl.} =
   result = targetDepth <= source.depth and source.display[targetDepth] == token
+
+when defined(gcDestructors):
+  proc nimGetVTable(p: pointer, index: int): pointer
+        {.compilerRtl, inline, raises: [].} =
+    result = cast[ptr PNimTypeV2](p).vTable[index]
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
deleted file mode 100644
index 158f40177..000000000
--- a/lib/system/arithm.nim
+++ /dev/null
@@ -1,425 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# simple integer arithmetic with overflow checking
-
-proc raiseOverflow {.compilerproc, noinline.} =
-  # a single proc to reduce code size to a minimum
-  sysFatal(OverflowDefect, "over- or underflow")
-
-proc raiseDivByZero {.compilerproc, noinline.} =
-  sysFatal(DivByZeroDefect, "division by zero")
-
-when defined(builtinOverflow):
-  # Builtin compiler functions for improved performance
-  when sizeof(clong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
-
-  elif sizeof(clonglong) == 8:
-    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
-
-    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
-
-    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-      importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
-
-  when sizeof(int) == 8:
-    proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      addInt64Overflow(a, b, c)
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      subInt64Overflow(a, b, c)
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
-      mulInt64Overflow(a, b, c)
-
-  elif sizeof(int) == 4 and sizeof(cint) == 4:
-    proc addIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
-
-    proc subIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
-
-    proc mulIntOverflow(a, b: int, c: var int): bool {.
-      importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
-
-  proc addInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if addInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if subInt64Overflow(a, b, result):
-      raiseOverflow()
-
-  proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    if mulInt64Overflow(a, b, result):
-      raiseOverflow()
-else:
-  proc addInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    result = a +% b
-    if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  proc subInt64(a, b: int64): int64 {.compilerproc, inline.} =
-    result = a -% b
-    if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-      return result
-    raiseOverflow()
-
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if they're
-  # approximately the same. Else, because the native int product is the only
-  # one that can lose catastrophic amounts of information, it's the native int
-  # product that must have overflowed.
-  #
-  proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-    var
-      resAsFloat, floatProd: float64
-    result = a *% b
-    floatProd = toBiggestFloat(a) # conversion
-    floatProd = floatProd * toBiggestFloat(b)
-    resAsFloat = toBiggestFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-proc negInt64(a: int64): int64 {.compilerproc, inline.} =
-  if a != low(int64): return -a
-  raiseOverflow()
-
-proc absInt64(a: int64): int64 {.compilerproc, inline.} =
-  if a != low(int64):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-proc divInt64(a, b: int64): int64 {.compilerproc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  if a == low(int64) and b == int64(-1):
-    raiseOverflow()
-  return a div b
-
-proc modInt64(a, b: int64): int64 {.compilerproc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  return a mod b
-
-proc absInt(a: int): int {.compilerproc, inline.} =
-  if a != low(int):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-const
-  asmVersion = defined(i386) and (defined(vcc) or defined(wcc) or
-               defined(dmc) or defined(gcc) or defined(llvm_gcc))
-    # my Version of Borland C++Builder does not have
-    # tasm32, which is needed for assembler blocks
-    # this is why Borland is not included in the 'when'
-
-when asmVersion and not defined(gcc) and not defined(llvm_gcc):
-  # assembler optimized versions for compilers that
-  # have an intel syntax assembler:
-  proc addInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    # a in eax, and b in edx
-    asm """
-        mov eax, ecx
-        add eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc subInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        sub eax, edx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc negInt(a: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        neg eax
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-  proc divInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        test  edx, edx
-        jne   L_NOT_ZERO
-        call  `raiseDivByZero`
-      L_NOT_ZERO:
-        cmp   ecx, 0x80000000
-        jne   L_DO_DIV
-        cmp   edx, -1
-        jne   L_DO_DIV
-        call  `raiseOverflow`
-      L_DO_DIV:
-        mov   eax, ecx
-        mov   ecx, edx
-        cdq
-        idiv  ecx
-        ret
-    """
-
-  proc modInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        test  edx, edx
-        jne   L_NOT_ZERO
-        call  `raiseDivByZero`
-      L_NOT_ZERO:
-        cmp   ecx, 0x80000000
-        jne   L_DO_DIV
-        cmp   edx, -1
-        jne   L_DO_DIV
-        call  `raiseOverflow`
-      L_DO_DIV:
-        mov   eax, ecx
-        mov   ecx, edx
-        cdq
-        idiv  ecx
-        mov   eax, edx
-        ret
-    """
-
-  proc mulInt(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-    asm """
-        mov eax, ecx
-        mov ecx, edx
-        xor edx, edx
-        imul ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        ret
-    """
-
-elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    # don't use a pure proc here!
-    asm """
-      "addl %%ecx, %%eax\n"
-      "jno 1\n"
-      "call _raiseOverflow\n"
-      "1: \n"
-      :"=a"(`result`)
-      :"a"(`a`), "c"(`b`)
-    """
-    #".intel_syntax noprefix"
-    #/* Intel syntax here */
-    #".att_syntax"
-
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    asm """ "subl %%ecx,%%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`), "c"(`b`)
-    """
-
-  proc mulInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "imull %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc negInt(a: int): int {.compilerproc, inline.} =
-    asm """ "negl %%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`)
-    """
-
-  proc divInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc modInt(a, b: int): int {.compilerproc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-             "movl %%edx, %%eax"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-when not declared(addInt) and defined(builtinOverflow):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    if addIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(subInt) and defined(builtinOverflow):
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    if subIntOverflow(a, b, result):
-      raiseOverflow()
-
-when not declared(mulInt) and defined(builtinOverflow):
-  proc mulInt(a, b: int): int {.compilerproc, inline.} =
-    if mulIntOverflow(a, b, result):
-      raiseOverflow()
-
-# Platform independent versions of the above (slower!)
-when not declared(addInt):
-  proc addInt(a, b: int): int {.compilerproc, inline.} =
-    result = a +% b
-    if (result xor a) >= 0 or (result xor b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(subInt):
-  proc subInt(a, b: int): int {.compilerproc, inline.} =
-    result = a -% b
-    if (result xor a) >= 0 or (result xor not b) >= 0:
-      return result
-    raiseOverflow()
-
-when not declared(negInt):
-  proc negInt(a: int): int {.compilerproc, inline.} =
-    if a != low(int): return -a
-    raiseOverflow()
-
-when not declared(divInt):
-  proc divInt(a, b: int): int {.compilerproc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    if a == low(int) and b == -1:
-      raiseOverflow()
-    return a div b
-
-when not declared(modInt):
-  proc modInt(a, b: int): int {.compilerproc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    return a mod b
-
-when not declared(mulInt):
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if
-  # they're approximately the same. Else, because the native int product is
-  # the only one that can lose catastrophic amounts of information, it's the
-  # native int product that must have overflowed.
-  #
-  proc mulInt(a, b: int): int {.compilerproc.} =
-    var
-      resAsFloat, floatProd: float
-
-    result = a *% b
-    floatProd = toFloat(a) * toFloat(b)
-    resAsFloat = toFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-# We avoid setting the FPU control word here for compatibility with libraries
-# written in other languages.
-
-proc raiseFloatInvalidOp {.compilerproc, noinline.} =
-  sysFatal(FloatInvalidOpDefect, "FPU operation caused a NaN result")
-
-proc nanCheck(x: float64) {.compilerproc, inline.} =
-  if x != x: raiseFloatInvalidOp()
-
-proc raiseFloatOverflow(x: float64) {.compilerproc, noinline.} =
-  if x > 0.0:
-    sysFatal(FloatOverflowDefect, "FPU operation caused an overflow")
-  else:
-    sysFatal(FloatUnderflowDefect, "FPU operations caused an underflow")
-
-proc infCheck(x: float64) {.compilerproc, inline.} =
-  if x != 0.0 and x*0.5 == x: raiseFloatOverflow(x)
diff --git a/lib/system/arithmetics.nim b/lib/system/arithmetics.nim
index d05aaaa5b..e229a0f4b 100644
--- a/lib/system/arithmetics.nim
+++ b/lib/system/arithmetics.nim
@@ -93,7 +93,7 @@ proc `*`*(x, y: int16): int16 {.magic: "MulI", noSideEffect.}
 proc `*`*(x, y: int32): int32 {.magic: "MulI", noSideEffect.}
 proc `*`*(x, y: int64): int64 {.magic: "MulI", noSideEffect.}
 
-proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.} = 
+proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.} =
   ## Computes the integer division.
   ##
   ## This is roughly the same as `math.trunc(x/y).int`.
@@ -403,107 +403,3 @@ proc `%%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) mod cast[u
 proc `%%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) mod cast[uint16](y))
 proc `%%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) mod cast[uint32](y))
 proc `%%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) mod cast[uint64](y))
-
-when not defined(nimPreviewSlimSystem):
-  when defined(nimNoZeroExtendMagic):
-    proc ze*(x: int8): int {.deprecated.} =
-      ## zero extends a smaller integer type to `int`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int](uint(cast[uint8](x)))
-
-    proc ze*(x: int16): int {.deprecated.} =
-      ## zero extends a smaller integer type to `int`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int](uint(cast[uint16](x)))
-
-    proc ze64*(x: int8): int64 {.deprecated.} =
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int64](uint64(cast[uint8](x)))
-
-    proc ze64*(x: int16): int64 {.deprecated.} =
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int64](uint64(cast[uint16](x)))
-
-    proc ze64*(x: int32): int64 {.deprecated.} =
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int64](uint64(cast[uint32](x)))
-
-    proc ze64*(x: int): int64 {.deprecated.} =
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned. Does nothing if the size of an `int` is the same as `int64`.
-      ## (This is the case on 64 bit processors.)
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int64](uint64(cast[uint](x)))
-
-    proc toU8*(x: int): int8 {.deprecated.} =
-      ## treats `x` as unsigned and converts it to a byte by taking the last 8 bits
-      ## from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int8](x)
-
-    proc toU16*(x: int): int16 {.deprecated.} =
-      ## treats `x` as unsigned and converts it to an `int16` by taking the last
-      ## 16 bits from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int16](x)
-
-    proc toU32*(x: int64): int32 {.deprecated.} =
-      ## treats `x` as unsigned and converts it to an `int32` by taking the
-      ## last 32 bits from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-      cast[int32](x)
-
-  elif not defined(js):
-    proc ze*(x: int8): int {.magic: "Ze8ToI", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc ze*(x: int16): int {.magic: "Ze16ToI", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc ze64*(x: int8): int64 {.magic: "Ze8ToI64", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc ze64*(x: int16): int64 {.magic: "Ze16ToI64", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc ze64*(x: int32): int64 {.magic: "Ze32ToI64", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc ze64*(x: int): int64 {.magic: "ZeIToI64", noSideEffect, deprecated.}
-      ## zero extends a smaller integer type to `int64`. This treats `x` as
-      ## unsigned. Does nothing if the size of an `int` is the same as `int64`.
-      ## (This is the case on 64 bit processors.)
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc toU8*(x: int): int8 {.magic: "ToU8", noSideEffect, deprecated.}
-      ## treats `x` as unsigned and converts it to a byte by taking the last 8 bits
-      ## from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc toU16*(x: int): int16 {.magic: "ToU16", noSideEffect, deprecated.}
-      ## treats `x` as unsigned and converts it to an `int16` by taking the last
-      ## 16 bits from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
-
-    proc toU32*(x: int64): int32 {.magic: "ToU32", noSideEffect, deprecated.}
-      ## treats `x` as unsigned and converts it to an `int32` by taking the
-      ## last 32 bits from `x`.
-      ## **Deprecated since version 0.19.9**: Use unsigned integers instead.
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 20b854107..9f4cbc0fe 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -15,8 +15,8 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) {.benign.
 proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
                       shallow: bool) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericAssignAux(cast[pointer](d +% n.offset),
@@ -56,8 +56,8 @@ template deepSeqAssignImpl(operation, additionalArg) {.dirty.} =
 
 proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericAssignAux 2")
   case mt.kind
   of tyString:
@@ -89,17 +89,17 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
         var ss = nimNewSeqOfCap(mt, seq.len)
         cast[PGenericSeq](ss).len = seq.len
         unsureAsgnRef(x, ss)
-        var dst = cast[ByteAddress](cast[PPointer](dest)[])
+        var dst = cast[int](cast[PPointer](dest)[])
         copyMem(cast[pointer](dst +% align(GenericSeqSize, mt.base.align)),
-                cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align)),
+                cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align)),
                 seq.len *% mt.base.size)
       else:
         unsureAsgnRef(x, newSeq(mt, seq.len))
-        var dst = cast[ByteAddress](cast[PPointer](dest)[])
+        var dst = cast[int](cast[PPointer](dest)[])
         for i in 0..seq.len-1:
           genericAssignAux(
             cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
-            cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
             mt.base, shallow)
   of tyObject:
     var it = mt.base
@@ -181,15 +181,15 @@ proc genericSeqAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
 proc genericAssignOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericAssign(cast[pointer](d +% i *% mt.base.size),
                   cast[pointer](s +% i *% mt.base.size), mt.base)
 
 proc objectInit(dest: pointer, typ: PNimType) {.compilerproc, benign.}
 proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "objectInitAux")
   of nkSlot: objectInit(cast[pointer](d +% n.offset), n.typ)
@@ -203,7 +203,7 @@ proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
 proc objectInit(dest: pointer, typ: PNimType) =
   # the generic init proc that takes care of initialization of complex
   # objects on the stack or heap
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case typ.kind
   of tyObject:
     # iterate over any structural type
@@ -226,7 +226,7 @@ proc objectInit(dest: pointer, typ: PNimType) =
 
 proc genericReset(dest: pointer, mt: PNimType) {.compilerproc, benign.}
 proc genericResetAux(dest: pointer, n: ptr TNimNode) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "genericResetAux")
   of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ)
@@ -238,7 +238,7 @@ proc genericResetAux(dest: pointer, n: ptr TNimNode) =
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
 
 proc genericReset(dest: pointer, mt: PNimType) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   sysAssert(mt != nil, "genericReset 2")
   case mt.kind
   of tyRef:
@@ -275,10 +275,12 @@ proc genericReset(dest: pointer, mt: PNimType) =
 
 proc selectBranch(discVal, L: int,
                   a: ptr array[0x7fff, ptr TNimNode]): ptr TNimNode =
-  result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
-    let x = a[discVal]
-    if x != nil: result = x
+    result = a[discVal]
+    if result == nil:
+      result = a[L]
+  else:
+    result = a[L] # a[L] contains the ``else`` part (but may be nil)
 
 proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
                             a: ptr array[0x7fff, ptr TNimNode],
diff --git a/lib/system/cellseqs_v1.nim b/lib/system/cellseqs_v1.nim
index 1952491b3..1a305aa42 100644
--- a/lib/system/cellseqs_v1.nim
+++ b/lib/system/cellseqs_v1.nim
@@ -16,18 +16,21 @@ type
     d: PCellArray
 
 proc contains(s: CellSeq, c: PCell): bool {.inline.} =
-  for i in 0 .. s.len-1:
-    if s.d[i] == c: return true
+  for i in 0 ..< s.len:
+    if s.d[i] == c:
+      return true
   return false
 
+proc resize(s: var CellSeq) =
+  s.cap = s.cap * 3 div 2
+  let d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
+  copyMem(d, s.d, s.len * sizeof(PCell))
+  dealloc(s.d)
+  s.d = d
+
 proc add(s: var CellSeq, c: PCell) {.inline.} =
   if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    var d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
-    copyMem(d, s.d, s.len * sizeof(PCell))
-    dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
+    resize(s)
   s.d[s.len] = c
   inc(s.len)
 
diff --git a/lib/system/cellseqs_v2.nim b/lib/system/cellseqs_v2.nim
index 27be48d78..c6c7b1a8e 100644
--- a/lib/system/cellseqs_v2.nim
+++ b/lib/system/cellseqs_v2.nim
@@ -16,20 +16,17 @@ type
     len, cap: int
     d: CellArray[T]
 
-proc add[T](s: var CellSeq[T], c: T; t: PNimTypeV2) {.inline.} =
+proc resize[T](s: var CellSeq[T]) =
+  s.cap = s.cap * 3 div 2
+  var newSize = s.cap * sizeof(CellTuple[T])
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](reallocShared(s.d, newSize))
+  else:
+    s.d = cast[CellArray[T]](realloc(s.d, newSize))
+
+proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =
   if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    when compileOption("threads"):
-      var d = cast[CellArray[T]](allocShared(uint(s.cap * sizeof(CellTuple[T]))))
-    else:
-      var d = cast[CellArray[T]](alloc(s.cap * sizeof(CellTuple[T])))
-    copyMem(d, s.d, s.len * sizeof(CellTuple[T]))
-    when compileOption("threads"):
-      deallocShared(s.d)
-    else:
-      dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
+    s.resize()
   s.d[s.len] = (c, t)
   inc(s.len)
 
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index a0f1fabf9..92036c226 100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -42,7 +42,7 @@ Complete traversal is done in this way::
 
 ]#
 
-when defined(gcOrc) or defined(gcArc):
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
   type
     PCell = Cell
 
@@ -78,7 +78,7 @@ type
     head: PPageDesc
     data: PPageDescArray
 
-when defined(gcOrc) or defined(gcArc):
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
   discard
 else:
   include cellseqs_v1
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index 9d0d248c3..9a7645f9b 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -8,13 +8,3 @@
 #
 
 # Headers for procs that the code generator depends on ("compilerprocs")
-
-type
-  LibHandle = pointer       # private type
-  ProcAddr = pointer        # library loading and loading of procs:
-
-proc nimLoadLibrary(path: string): LibHandle {.compilerproc, hcrInline, nonReloadable.}
-proc nimUnloadLibrary(lib: LibHandle) {.compilerproc, hcrInline, nonReloadable.}
-proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.compilerproc, hcrInline, nonReloadable.}
-
-proc nimLoadLibraryError(path: string) {.compilerproc, hcrInline, nonReloadable.}
diff --git a/lib/system/channels_builtin.nim b/lib/system/channels_builtin.nim
index fbe3f0e98..02b4d8cbf 100644
--- a/lib/system/channels_builtin.nim
+++ b/lib/system/channels_builtin.nim
@@ -26,7 +26,7 @@
 ## The following is a simple example of two different ways to use channels:
 ## blocking and non-blocking.
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   # Be sure to compile with --threads:on.
 ##   # The channels and threads modules are part of system and should not be
 ##   # imported.
@@ -87,6 +87,7 @@
 ##
 ##   # Clean up the channel.
 ##   chan.close()
+##   ```
 ##
 ## Sample output
 ## -------------
@@ -113,7 +114,7 @@
 ## using e.g. `system.allocShared0` and pass these pointers through thread
 ## arguments:
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   proc worker(channel: ptr Channel[string]) =
 ##     let greeting = channel[].recv()
 ##     echo greeting
@@ -135,6 +136,7 @@
 ##     deallocShared(channel)
 ##
 ##   localChannelExample() # "Hello from the main thread!"
+##   ```
 
 when not declared(ThisIsSystem):
   {.error: "You must not import this module explicitly".}
@@ -185,8 +187,8 @@ when not usesDestructors:
   proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
                 mode: LoadStoreMode) {.benign.} =
     var
-      d = cast[ByteAddress](dest)
-      s = cast[ByteAddress](src)
+      d = cast[int](dest)
+      s = cast[int](src)
     case n.kind
     of nkSlot: storeAux(cast[pointer](d +% n.offset),
                         cast[pointer](s +% n.offset), n.typ, t, mode)
@@ -205,8 +207,8 @@ when not usesDestructors:
       cast[pointer](cast[int](p) +% x)
 
     var
-      d = cast[ByteAddress](dest)
-      s = cast[ByteAddress](src)
+      d = cast[int](dest)
+      s = cast[int](src)
     sysAssert(mt != nil, "mt == nil")
     case mt.kind
     of tyString:
@@ -245,14 +247,14 @@ when not usesDestructors:
           x[] = alloc0(t.region, align(GenericSeqSize, mt.base.align) +% seq.len *% mt.base.size)
         else:
           unsureAsgnRef(x, newSeq(mt, seq.len))
-        var dst = cast[ByteAddress](cast[PPointer](dest)[])
+        var dst = cast[int](cast[PPointer](dest)[])
         var dstseq = cast[PGenericSeq](dst)
         dstseq.len = seq.len
         dstseq.reserved = seq.len
         for i in 0..seq.len-1:
           storeAux(
             cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
-            cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +%
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +%
                           i *% mt.base.size),
             mt.base, t, mode)
         if mode != mStore: dealloc(t.region, s2)
@@ -392,7 +394,7 @@ proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
   q.ready = false
   if typ != q.elemType:
     releaseSys(q.lock)
-    sysFatal(ValueError, "cannot receive message of wrong type")
+    raise newException(ValueError, "cannot receive message of wrong type")
   rawRecv(q, res, typ)
   if q.maxItems > 0 and q.count == q.maxItems - 1:
     # Parent thread is awaiting in send. Wake it up.
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index dd26d140d..b48855964 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -38,6 +38,10 @@ when defined(nimV2):
   proc raiseFieldError2(f: string, discVal: int) {.compilerproc, noinline.} =
     ## raised when field is inaccessible given runtime value of discriminant
     sysFatal(FieldDefect, f & $discVal & "'")
+
+  proc raiseFieldErrorStr(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
 else:
   proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noinline.} =
     ## raised when field is inaccessible given runtime value of discriminant
diff --git a/lib/system/comparisons.nim b/lib/system/comparisons.nim
index 36d4d06a8..a8d78bb93 100644
--- a/lib/system/comparisons.nim
+++ b/lib/system/comparisons.nim
@@ -35,7 +35,7 @@ proc `==`*[T](x, y: ref T): bool {.magic: "EqRef", noSideEffect.}
   ## Checks that two `ref` variables refer to the same item.
 proc `==`*[T](x, y: ptr T): bool {.magic: "EqRef", noSideEffect.}
   ## Checks that two `ptr` variables refer to the same item.
-proc `==`*[T: proc](x, y: T): bool {.magic: "EqProc", noSideEffect.}
+proc `==`*[T: proc | iterator](x, y: T): bool {.magic: "EqProc", noSideEffect.}
   ## Checks that two `proc` variables refer to the same procedure.
 
 proc `<=`*[Enum: enum](x, y: Enum): bool {.magic: "LeEnum", noSideEffect.}
@@ -324,7 +324,7 @@ proc `==`*[T](x, y: seq[T]): bool {.noSideEffect.} =
         return true
     else:
       var sameObject = false
-      asm """`sameObject` = `x` === `y`"""
+      {.emit: """`sameObject` = `x` === `y`;""".}
       if sameObject: return true
 
   if x.len != y.len:
diff --git a/lib/system/compilation.nim b/lib/system/compilation.nim
index 6109e9874..cdb976ed5 100644
--- a/lib/system/compilation.nim
+++ b/lib/system/compilation.nim
@@ -1,16 +1,16 @@
 const
-  NimMajor* {.intdefine.}: int = 1
+  NimMajor* {.intdefine.}: int = 2
     ## is the major number of Nim's version. Example:
-    ##   ```
+    ##   ```nim
     ##   when (NimMajor, NimMinor, NimPatch) >= (1, 3, 1): discard
     ##   ```
     # see also std/private/since
 
-  NimMinor* {.intdefine.}: int = 7
+  NimMinor* {.intdefine.}: int = 2
     ## is the minor number of Nim's version.
     ## Odd for devel, even for releases.
 
-  NimPatch* {.intdefine.}: int = 3
+  NimPatch* {.intdefine.}: int = 1
     ## is the patch number of Nim's version.
     ## Odd for devel, even for releases.
 
@@ -40,7 +40,7 @@ proc defined*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
   ## `x` is an external symbol introduced through the compiler's
   ## `-d:x switch <nimc.html#compiler-usage-compileminustime-symbols>`_ to enable
   ## build time conditionals:
-  ##   ```
+  ##   ```nim
   ##   when not defined(release):
   ##     # Do here programmer friendly expensive sanity checks.
   ##   # Put here the normal code
@@ -51,36 +51,30 @@ proc defined*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
   ## * `compileOption <#compileOption,string,string>`_ for enum options
   ## * `define pragmas <manual.html#implementation-specific-pragmas-compileminustime-define-pragmas>`_
 
-when defined(nimHasDeclaredMagic):
-  proc declared*(x: untyped): bool {.magic: "Declared", noSideEffect, compileTime.}
-    ## Special compile-time procedure that checks whether `x` is
-    ## declared. `x` has to be an identifier or a qualified identifier.
-    ##
-    ## This can be used to check whether a library provides a certain
-    ## feature or not:
-    ##   ```
-    ##   when not declared(strutils.toUpper):
-    ##     # provide our own toUpper proc here, because strutils is
-    ##     # missing it.
-    ##   ```
-    ##
-    ## See also:
-    ## * `declaredInScope <#declaredInScope,untyped>`_
-else:
-  proc declared*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
-
-when defined(nimHasDeclaredMagic):
-  proc declaredInScope*(x: untyped): bool {.magic: "DeclaredInScope", noSideEffect, compileTime.}
-    ## Special compile-time procedure that checks whether `x` is
-    ## declared in the current scope. `x` has to be an identifier.
-else:
-  proc declaredInScope*(x: untyped): bool {.magic: "DefinedInScope", noSideEffect, compileTime.}
+proc declared*(x: untyped): bool {.magic: "Declared", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared. `x` has to be an identifier or a qualified identifier.
+  ##
+  ## This can be used to check whether a library provides a certain
+  ## feature or not:
+  ##   ```nim
+  ##   when not declared(strutils.toUpper):
+  ##     # provide our own toUpper proc here, because strutils is
+  ##     # missing it.
+  ##   ```
+  ##
+  ## See also:
+  ## * `declaredInScope <#declaredInScope,untyped>`_
+
+proc declaredInScope*(x: untyped): bool {.magic: "DeclaredInScope", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared in the current scope. `x` has to be an identifier.
 
 proc compiles*(x: untyped): bool {.magic: "Compiles", noSideEffect, compileTime.} =
   ## Special compile-time procedure that checks whether `x` can be compiled
   ## without any semantic error.
   ## This can be used to check whether a type supports some operation:
-  ##   ```
+  ##   ```nim
   ##   when compiles(3 + 4):
   ##     echo "'+' for integers is available"
   ##   ```
@@ -150,16 +144,17 @@ template currentSourcePath*: string = instantiationInfo(-1, true).filename
   ## Returns the full file-system path of the current source.
   ##
   ## To get the directory containing the current source, use it with
-  ## `os.parentDir() <os.html#parentDir%2Cstring>`_ as `currentSourcePath.parentDir()`.
+  ## `ospaths2.parentDir() <ospaths2.html#parentDir%2Cstring>`_ as
+  ## `currentSourcePath.parentDir()`.
   ##
   ## The path returned by this template is set at compile time.
   ##
   ## See the docstring of `macros.getProjectPath() <macros.html#getProjectPath>`_
-  ## for an example to see the distinction between the `currentSourcePath`
-  ## and `getProjectPath`.
+  ## for an example to see the distinction between the `currentSourcePath()`
+  ## and `getProjectPath()`.
   ##
   ## See also:
-  ## * `getCurrentDir proc <os.html#getCurrentDir>`_
+  ## * `ospaths2.getCurrentDir() proc <ospaths2.html#getCurrentDir>`_
 
 proc slurp*(filename: string): string {.magic: "Slurp".}
   ## This is an alias for `staticRead <#staticRead,string>`_.
@@ -170,7 +165,7 @@ proc staticRead*(filename: string): string {.magic: "Slurp".}
   ##
   ## The maximum file size limit that `staticRead` and `slurp` can read is
   ## near or equal to the *free* memory of the device you are using to compile.
-  ##   ```
+  ##   ```nim
   ##   const myResource = staticRead"mydatafile.bin"
   ##   ```
   ##
@@ -187,7 +182,7 @@ proc staticExec*(command: string, input = "", cache = ""): string {.
   ##
   ## If `input` is not an empty string, it will be passed as a standard input
   ## to the executed program.
-  ##   ```
+  ##   ```nim
   ##   const buildInfo = "Revision " & staticExec("git rev-parse HEAD") &
   ##                     "\nCompiled on " & staticExec("uname -v")
   ##   ```
@@ -203,7 +198,7 @@ proc staticExec*(command: string, input = "", cache = ""): string {.
   ## behaviour then. `command & input & cache` (the concatenated string) is
   ## used to determine whether the entry in the cache is still valid. You can
   ## use versioning information for `cache`:
-  ##   ```
+  ##   ```nim
   ##   const stateMachine = staticExec("dfaoptimizer", "input", "0.8.0")
   ##   ```
 
diff --git a/lib/system/ctypes.nim b/lib/system/ctypes.nim
index 6ba28ed49..b788274bd 100644
--- a/lib/system/ctypes.nim
+++ b/lib/system/ctypes.nim
@@ -12,16 +12,10 @@ type
     ## compiler supports. Currently this is `float64`, but it is
     ## platform-dependent in general.
 
-when defined(js):
-  type BiggestUInt* = uint32
+  BiggestUInt* = uint64
     ## is an alias for the biggest unsigned integer type the Nim compiler
-    ## supports. Currently this is `uint32` for JS and `uint64` for other
-    ## targets.
-else:
-  type BiggestUInt* = uint64
-    ## is an alias for the biggest unsigned integer type the Nim compiler
-    ## supports. Currently this is `uint32` for JS and `uint64` for other
-    ## targets.
+    ## supports. Currently this is `uint64`, but it is platform-dependent
+    ## in general.
 
 when defined(windows):
   type
@@ -67,7 +61,7 @@ type # these work for most platforms:
     ## This is the same as the type `unsigned long long` in *C*.
 
 type
-  ByteAddress* = int
+  ByteAddress* {.deprecated: "use `uint`".} = int
     ## is the signed integer type that should be used for converting
     ## pointers to integer addresses for readability.
 
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 1f30b8427..72d35f518 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -61,8 +61,8 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType;
 proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
                         tab: var PtrTable) {.benign.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
     genericDeepCopyAux(cast[pointer](d +% n.offset),
@@ -85,8 +85,8 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
 
 proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericDeepCopyAux 2")
   case mt.kind
   of tyString:
@@ -113,11 +113,11 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
         return
       sysAssert(dest != nil, "genericDeepCopyAux 3")
       unsureAsgnRef(x, newSeq(mt, seq.len))
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
+      var dst = cast[int](cast[PPointer](dest)[])
       for i in 0..seq.len-1:
         genericDeepCopyAux(
           cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
-          cast[pointer](cast[ByteAddress](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
           mt.base, tab)
   of tyObject:
     # we need to copy m_type field for tyObject, as it could be empty for
@@ -199,8 +199,8 @@ proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
 proc genericDeepCopyOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
     genericDeepCopy(cast[pointer](d +% i *% mt.base.size),
                     cast[pointer](s +% i *% mt.base.size), mt.base)
diff --git a/lib/system/dollars.nim b/lib/system/dollars.nim
index 4ff3d0ae6..89a739d5a 100644
--- a/lib/system/dollars.nim
+++ b/lib/system/dollars.nim
@@ -41,9 +41,9 @@ proc `$`*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
 proc `$`*(x: char): string {.magic: "CharToStr", noSideEffect.}
   ## The stringify operator for a character argument. Returns `x`
   ## converted to a string.
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   assert $'c' == "c"
+  ##   ```
 
 proc `$`*(x: cstring): string {.magic: "CStrToStr", noSideEffect.}
   ## The stringify operator for a CString argument. Returns `x`
@@ -67,19 +67,20 @@ proc `$`*(t: typedesc): string {.magic: "TypeTrait".}
   ## For more procedures dealing with `typedesc`, see
   ## `typetraits module <typetraits.html>`_.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   doAssert $(typeof(42)) == "int"
   ##   doAssert $(typeof("Foo")) == "string"
   ##   static: doAssert $(typeof(@['A', 'B'])) == "seq[char]"
+  ##   ```
 
 proc `$`*[T: tuple](x: T): string =
   ## Generic `$` operator for tuples that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(23, 45) == "(23, 45)"
   ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
   ##   $() == "()"
+  ##   ```
   tupleObjectDollar(result, x)
 
 when not defined(nimPreviewSlimSystem):
@@ -108,25 +109,25 @@ proc collectionToString[T](x: T, prefix, separator, suffix: string): string =
 proc `$`*[T](x: set[T]): string =
   ## Generic `$` operator for sets that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   ${23, 45} == "{23, 45}"
+  ##   ```
   collectionToString(x, "{", ", ", "}")
 
 proc `$`*[T](x: seq[T]): string =
   ## Generic `$` operator for seqs that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
   collectionToString(x, "@[", ", ", "]")
 
 proc `$`*[T, U](x: HSlice[T, U]): string =
   ## Generic `$` operator for slices that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##  $(1 .. 5) == "1 .. 5"
+  ##  ```
   result = $x.a
   result.add(" .. ")
   result.add($x.b)
@@ -140,7 +141,7 @@ when not defined(nimNoArrayToString):
 proc `$`*[T](x: openArray[T]): string =
   ## Generic `$` operator for openarrays that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
   collectionToString(x, "[", ", ", "]")
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index de22f7cbb..2162b234f 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -176,7 +176,7 @@ elif defined(genode):
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     raiseAssert("nimGetProcAddr not implemented")
 
-elif defined(nintendoswitch) or defined(freertos) or defined(zephyr):
+elif defined(nintendoswitch) or defined(freertos) or defined(zephyr) or defined(nuttx):
   proc nimUnloadLibrary(lib: LibHandle) =
     cstderr.rawWrite("nimUnLoadLibrary not implemented")
     cstderr.rawWrite("\n")
diff --git a/lib/system/embedded.nim b/lib/system/embedded.nim
index e0b053c7b..ea6776f58 100644
--- a/lib/system/embedded.nim
+++ b/lib/system/embedded.nim
@@ -19,8 +19,9 @@ proc nimFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} = discard
 proc popFrame {.compilerRtl, inl.} = discard
 
 proc setFrame(s: PFrame) {.compilerRtl, inl.} = discard
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
-proc popSafePoint {.compilerRtl, inl.} = discard
+when not gotoBasedExceptions:
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
+  proc popSafePoint {.compilerRtl, inl.} = discard
 proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = discard
 proc popCurrentException {.compilerRtl, inl.} = discard
 
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 86cfff9cd..dae5c4a4a 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -73,26 +73,45 @@ type
 
 when NimStackTraceMsgs:
   var frameMsgBuf* {.threadvar.}: string
+
+when not defined(nimV2):
+  var
+    framePtr {.threadvar.}: PFrame
+
 var
-  framePtr {.threadvar.}: PFrame
-  excHandler {.threadvar.}: PSafePoint
-    # list of exception handlers
-    # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
-  gcFramePtr {.threadvar.}: GcFrame
 
-type
-  FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
-                     excHandler: PSafePoint, currException: ref Exception]
+when not gotoBasedExceptions:
+  var
+    excHandler {.threadvar.}: PSafePoint
+      # list of exception handlers
+      # a global variable for the root of all try blocks
+    gcFramePtr {.threadvar.}: GcFrame
+
+when gotoBasedExceptions:
+  type
+    FrameState = tuple[framePtr: PFrame,
+                      currException: ref Exception]
+else:
+  type
+    FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
+                      excHandler: PSafePoint, currException: ref Exception]
 
 proc getFrameState*(): FrameState {.compilerRtl, inl.} =
-  return (gcFramePtr, framePtr, excHandler, currException)
+  when gotoBasedExceptions:
+    return (framePtr, currException)
+  else:
+    return (gcFramePtr, framePtr, excHandler, currException)
 
 proc setFrameState*(state: FrameState) {.compilerRtl, inl.} =
-  gcFramePtr = state.gcFramePtr
-  framePtr = state.framePtr
-  excHandler = state.excHandler
-  currException = state.currException
+  when gotoBasedExceptions:
+    framePtr = state.framePtr
+    currException = state.currException
+  else:
+    gcFramePtr = state.gcFramePtr
+    framePtr = state.framePtr
+    excHandler = state.excHandler
+    currException = state.currException
 
 proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
 
@@ -114,20 +133,21 @@ when false:
 proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
-proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
-proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
-proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
-proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
-  s.prev = gcFramePtr
-  zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
-  gcFramePtr = s
+when not gotoBasedExceptions:
+  proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
+  proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
+  proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
+  proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
+    s.prev = gcFramePtr
+    zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
+    gcFramePtr = s
 
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
-  s.prev = excHandler
-  excHandler = s
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
+    s.prev = excHandler
+    excHandler = s
 
-proc popSafePoint {.compilerRtl, inl.} =
-  excHandler = excHandler.prev
+  proc popSafePoint {.compilerRtl, inl.} =
+    excHandler = excHandler.prev
 
 proc pushCurrentException(e: sink(ref Exception)) {.compilerRtl, inl.} =
   e.up = currException
@@ -407,15 +427,16 @@ proc reportUnhandledError(e: ref Exception) {.nodestroy, gcsafe.} =
   when hostOS != "any":
     reportUnhandledErrorAux(e)
 
-proc nimLeaveFinally() {.compilerRtl.} =
-  when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
-    {.emit: "throw;".}
-  else:
-    if excHandler != nil:
-      c_longjmp(excHandler.context, 1)
+when not gotoBasedExceptions:
+  proc nimLeaveFinally() {.compilerRtl.} =
+    when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+      {.emit: "throw;".}
     else:
-      reportUnhandledError(currException)
-      rawQuit(1)
+      if excHandler != nil:
+        c_longjmp(excHandler.context, 1)
+      else:
+        reportUnhandledError(currException)
+        rawQuit(1)
 
 when gotoBasedExceptions:
   var nimInErrorMode {.threadvar.}: bool
@@ -448,7 +469,7 @@ proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
     else:
       pushCurrentException(e)
       {.emit: "throw `e`;".}
-  elif defined(nimQuirky) or gotoBasedExceptions:
+  elif quirkyExceptions or gotoBasedExceptions:
     pushCurrentException(e)
     when gotoBasedExceptions:
       inc nimInErrorMode
@@ -560,7 +581,7 @@ proc nimFrame(s: PFrame) {.compilerRtl, inl, raises: [].} =
 when defined(cpp) and appType != "lib" and not gotoBasedExceptions and
     not defined(js) and not defined(nimscript) and
     hostOS != "standalone" and hostOS != "any" and not defined(noCppExceptions) and
-    not defined(nimQuirky):
+    not quirkyExceptions:
 
   type
     StdException {.importcpp: "std::exception", header: "<exception>".} = object
diff --git a/lib/system/fatal.nim b/lib/system/fatal.nim
index a55af2dc3..25c05e52d 100644
--- a/lib/system/fatal.nim
+++ b/lib/system/fatal.nim
@@ -9,27 +9,26 @@
 
 {.push profiler: off.}
 
-when defined(nimHasExceptionsQuery):
-  const gotoBasedExceptions = compileOption("exceptions", "goto")
-else:
-  const gotoBasedExceptions = false
+const
+  gotoBasedExceptions = compileOption("exceptions", "goto")
+  quirkyExceptions = compileOption("exceptions", "quirky")
 
 when hostOS == "standalone":
   include "$projectpath/panicoverride"
 
-  func sysFatal(exceptn: typedesc, message: string) {.inline.} =
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline.} =
     panic(message)
 
-  func sysFatal(exceptn: typedesc, message, arg: string) {.inline.} =
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline.} =
     rawoutput(message)
     panic(arg)
 
-elif (defined(nimQuirky) or defined(nimPanics)) and not defined(nimscript):
+elif quirkyExceptions and not defined(nimscript):
   import ansi_c
 
   func name(t: typedesc): string {.magic: "TypeTrait".}
 
-  func sysFatal(exceptn: typedesc, message, arg: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
     when nimvm:
       # TODO when doAssertRaises works in CT, add a test for it
       raise (ref exceptn)(msg: message & arg)
@@ -46,14 +45,14 @@ elif (defined(nimQuirky) or defined(nimPanics)) and not defined(nimscript):
         cstderr.rawWrite buf
       rawQuit 1
 
-  func sysFatal(exceptn: typedesc, message: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
     sysFatal(exceptn, message, "")
 
 else:
-  func sysFatal(exceptn: typedesc, message: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
     raise (ref exceptn)(msg: message)
 
-  func sysFatal(exceptn: typedesc, message, arg: string) {.inline, noreturn.} =
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
     raise (ref exceptn)(msg: message & arg)
 
 {.pop.}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index b36822aad..9289c7f55 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -31,7 +31,7 @@ In Nim the compiler cannot always know if a reference
 is stored on the stack or not. This is caused by var parameters.
 Consider this example:
 
-.. code-block:: Nim
+  ```Nim
   proc setRef(r: var ref TNode) =
     new(r)
 
@@ -41,11 +41,12 @@ Consider this example:
     setRef(r) # here we should not update the reference counts, because
               # r is on the stack
     setRef(r.left) # here we should update the refcounts!
+  ```
 
 We have to decide at runtime whether the reference is on the stack or not.
 The generated code looks roughly like this:
 
-.. code-block:: C
+  ```C
   void setref(TNode** ref) {
     unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode)))
   }
@@ -53,6 +54,7 @@ The generated code looks roughly like this:
     setRef(&r)
     setRef(&r->left)
   }
+  ```
 
 Note that for systems with a continuous stack (which most systems have)
 the check whether the ref is on the stack is very cheap (only two
@@ -76,7 +78,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int) {.benign.}
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -170,11 +172,11 @@ proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
@@ -336,7 +338,7 @@ proc cellsetReset(s: var CellSet) =
 {.push stacktrace:off.}
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -356,7 +358,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
@@ -382,7 +384,7 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
     of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
+      var d = cast[int](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
         for i in 0..s.len-1:
@@ -457,7 +459,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
@@ -507,7 +509,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
 
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   setFrameInfo(res)
@@ -549,9 +551,9 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
 
   var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize
   copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res) +% oldsize +% sizeof(Cell)),
+  zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
   # This can be wrong for intermediate temps that are nevertheless on the
   # heap because of lambda lifting:
   #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
@@ -683,7 +685,7 @@ proc collectCycles(gch: var GcHeap) {.raises: [].} =
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
-  var c = cast[ByteAddress](p)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
@@ -848,10 +850,10 @@ when withRealTime:
         stack.bottomSaved = stack.bottom
         when stackIncreases:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
+            cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize)
         else:
           stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
+            cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize)
 
     GC_step(gch, us, strongAdvice)
 
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
deleted file mode 100644
index 0593b396e..000000000
--- a/lib/system/gc2.nim
+++ /dev/null
@@ -1,749 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2017 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# xxx deadcode, consider removing unless something could be reused.
-
-
-#            Garbage Collector
-#
-# The basic algorithm is an incremental mark
-# and sweep GC to free cycles. It is hard realtime in that if you play
-# according to its rules, no deadline will ever be missed.
-# Since this kind of collector is very bad at recycling dead objects
-# early, Nim's codegen emits ``nimEscape`` calls at strategic
-# places. For this to work even 'unsureAsgnRef' needs to mark things
-# so that only return values need to be considered in ``nimEscape``.
-
-{.push profiler:off.}
-
-const
-  CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 512*1024 # start collecting after 500KB
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
-  withRealTime = defined(useRealtimeGC)
-
-when withRealTime and not declared(getTicks):
-  include "system/timers"
-when defined(memProfiler):
-  proc nimProfile(requestedSize: int) {.benign.}
-
-when hasThreadSupport:
-  include sharedlist
-
-type
-  ObjectSpaceIter = object
-    state: range[-1..0]
-
-iterToProc(allObjects, ptr ObjectSpaceIter, allObjectsAsProc)
-
-const
-  escapedBit = 0b1000 # so that lowest 3 bits are not touched
-  rcBlackOrig = 0b000
-  rcWhiteOrig = 0b001
-  rcGrey = 0b010   # traditional color for incremental mark&sweep
-  rcUnused = 0b011
-  colorMask = 0b011
-type
-  WalkOp = enum
-    waMarkGlobal,    # part of the backup mark&sweep
-    waMarkGrey,
-    waZctDecRef,
-    waDebug
-
-  Phase {.pure.} = enum
-    None, Marking, Sweeping
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
-    # A ref type can have a finalizer that is called before the object's
-    # storage is freed.
-
-  GcStat = object
-    stackScans: int          # number of performed stack scans (for statistics)
-    completedCollections: int    # number of performed full collections
-    maxThreshold: int        # max threshold that has been set
-    maxStackSize: int        # max stack size
-    maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table
-    maxPause: int64          # max measured GC pause in nanoseconds
-
-  GcStack {.final, pure.} = object
-    when nimCoroutines:
-      prev: ptr GcStack
-      next: ptr GcStack
-      maxStackSize: int      # Used to track statistics because we can not use
-                             # GcStat.maxStackSize when multiple stacks exist.
-    bottom: pointer
-
-    when withRealTime or nimCoroutines:
-      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
-    when withRealTime:
-      bottomSaved: pointer
-
-  GcHeap = object # this contains the zero count and
-                  # non-zero count table
-    black, red: int # either 0 or 1.
-    stack: GcStack
-    when nimCoroutines:
-      activeStack: ptr GcStack    # current executing coroutine stack.
-    phase: Phase
-    cycleThreshold: int
-    when useCellIds:
-      idGenerator: int
-    greyStack: CellSeq
-    recGcLock: int           # prevent recursion via finalizers; no thread lock
-    when withRealTime:
-      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
-    region: MemRegion        # garbage collected region
-    stat: GcStat
-    additionalRoots: CellSeq # explicit roots for GC_ref/unref
-    spaceIter: ObjectSpaceIter
-    pDumpHeapFile: pointer # File that is used for GC_dumpHeap
-    when hasThreadSupport:
-      toDispose: SharedList[pointer]
-    gcThreadId: int
-
-var
-  gch {.rtlThreadVar.}: GcHeap
-
-when not defined(useNimRtl):
-  instantiateForRegion(gch.region)
-
-# Which color to use for new objects is tricky: When we're marking,
-# they have to be *white* so that everything is marked that is only
-# reachable from them. However, when we are sweeping, they have to
-# be black, so that we don't free them prematuredly. In order to save
-# a comparison gch.phase == Phase.Marking, we use the pseudo-color
-# 'red' for new objects.
-template allocColor(): untyped = gch.red
-
-template gcAssert(cond: bool, msg: string) =
-  when defined(useGcAssert):
-    if not cond:
-      echo "[GCASSERT] ", msg
-      GC_disable()
-      writeStackTrace()
-      rawQuit 1
-
-proc cellToUsr(cell: PCell): pointer {.inline.} =
-  # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
-
-proc usrToCell(usr: pointer): PCell {.inline.} =
-  # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
-
-proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
-  # used for code generation concerning debugging
-  result = usrToCell(c).typ
-
-proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = 0
-
-# this that has to equals zero, otherwise we have to round up UnitsPerPage:
-when BitsPerPage mod (sizeof(int)*8) != 0:
-  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
-
-template color(c): untyped = c.refCount and colorMask
-template setColor(c, col) =
-  c.refcount = c.refcount and not colorMask or col
-
-template markAsEscaped(c: PCell) =
-  c.refcount = c.refcount or escapedBit
-
-template didEscape(c: PCell): bool =
-  (c.refCount and escapedBit) != 0
-
-proc writeCell(file: File; msg: cstring, c: PCell) =
-  var kind = -1
-  if c.typ != nil: kind = ord(c.typ.kind)
-  let col = if c.color == rcGrey: 'g'
-            elif c.color == gch.black: 'b'
-            else: 'w'
-  when useCellIds:
-    let id = c.id
-  else:
-    let id = c
-  when defined(nimTypeNames):
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c of type %s\n",
-              msg, id, kind, didEscape(c), col, c.typ.name)
-  elif leakDetector:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c from %s(%ld)\n",
-              msg, id, kind, didEscape(c), col, c.filename, c.line)
-  else:
-    c_fprintf(file, "%s %p %d escaped=%ld color=%c\n",
-              msg, id, kind, didEscape(c), col)
-
-proc writeCell(msg: cstring, c: PCell) =
-  stdout.writeCell(msg, c)
-
-proc myastToStr[T](x: T): string {.magic: "AstToStr", noSideEffect.}
-
-template gcTrace(cell, state: untyped) =
-  when traceGC: writeCell(myastToStr(state), cell)
-
-# forward declarations:
-proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
-proc doOperation(p: pointer, op: WalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
-# we need the prototype here for debugging purposes
-
-proc nimGCref(p: pointer) {.compilerproc.} =
-  let cell = usrToCell(p)
-  markAsEscaped(cell)
-  add(gch.additionalRoots, cell)
-
-proc nimGCunref(p: pointer) {.compilerproc.} =
-  let cell = usrToCell(p)
-  var L = gch.additionalRoots.len-1
-  var i = L
-  let d = gch.additionalRoots.d
-  while i >= 0:
-    if d[i] == cell:
-      d[i] = d[L]
-      dec gch.additionalRoots.len
-      break
-    dec(i)
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} =
-  discard "can we do some freeing here?"
-
-proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} =
-  discard "can we do some freeing here?"
-
-template markGrey(x: PCell) =
-  if x.color != 1-gch.black and gch.phase == Phase.Marking:
-    if not isAllocatedPtr(gch.region, x):
-      c_fprintf(stdout, "[GC] markGrey proc: %p\n", x)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-    x.setColor(rcGrey)
-    add(gch.greyStack, x)
-
-proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
-  # the code generator calls this proc!
-  gcAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    markGrey(s)
-  dest[] = src
-
-proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
-  deprecated: "old compiler compat".} = asgnRef(dest, src)
-
-proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} =
-  # unsureAsgnRef marks 'src' as grey only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide whether a
-  # reference is in the stack or not (this can happen for var parameters).
-  if src != nil:
-    let s = usrToCell(src)
-    markAsEscaped(s)
-    if not isOnStack(dest): markGrey(s)
-  dest[] = src
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
-  case n.kind
-  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
-  of nkList:
-    for i in 0..n.len-1:
-      forAllSlotsAux(dest, n.sons[i], op)
-  of nkCase:
-    var m = selectBranch(dest, n)
-    if m != nil: forAllSlotsAux(dest, m, op)
-  of nkNone: sysAssert(false, "forAllSlotsAux")
-
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
-  if dest == nil: return # nothing to do
-  if ntfNoRefs notin mt.flags:
-    case mt.kind
-    of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[PPointer](d)[], op)
-    of tyObject, tyTuple:
-      forAllSlotsAux(dest, mt.node, op)
-    of tyArray, tyArrayConstr, tyOpenArray:
-      for i in 0..(mt.size div mt.base.size)-1:
-        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: discard
-
-proc forAllChildren(cell: PCell, op: WalkOp) =
-  gcAssert(cell != nil, "forAllChildren: 1")
-  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
-  gcAssert(cell.typ != nil, "forAllChildren: 3")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
-  let marker = cell.typ.marker
-  if marker != nil:
-    marker(cellToUsr(cell), op.int)
-  else:
-    case cell.typ.kind
-    of tyRef: # common case
-      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
-    of tySequence:
-      var d = cast[ByteAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
-    else: discard
-
-{.push stackTrace: off, profiler:off.}
-proc gcInvariant*() =
-  sysAssert(allocInv(gch.region), "injected")
-  when declared(markForDebug):
-    markForDebug(gch)
-{.pop.}
-
-include gc_common
-
-proc initGC() =
-  when not defined(useNimRtl):
-    gch.red = (1-gch.black)
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.completedCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.additionalRoots)
-    init(gch.greyStack)
-    when hasThreadSupport:
-      init(gch.toDispose)
-    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
-    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
-
-proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
-  # generates a new object and sets its reference counter to 0
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  # refcount is zero, color is black, but mark it to be in the ZCT
-  res.refcount = allocColor()
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  sysAssert(allocInv(gch.region), "rawNewObj end")
-
-{.pop.}
-
-proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  when defined(memProfiler): nimProfile(size)
-
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
-
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
-  let size = addInt(align(GenericSeqSize, typ.base.align), mulInt(len, typ.base.size))
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = newObj(typ, size)
-
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  result = newSeq(typ, len)
-
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize, elemAlign = 1
-  if ol.typ.kind != tyString:
-    elemSize = ol.typ.base.size
-    elemAlign = ol.typ.base.align
-  incTypeSize ol.typ, newsize
-
-  var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when false:
-    # this is wrong since seqs can be shared via 'shallow':
-    when reallyDealloc: rawDealloc(gch.region, ol)
-    else:
-      zeroMem(ol, sizeof(Cell))
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  when defined(memProfiler): nimProfile(newsize-oldsize)
-
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, gch)
-
-{.push profiler:off.}
-
-
-template takeStartTime(workPackageSize) {.dirty.} =
-  const workPackage = workPackageSize
-  var debugticker = 1000
-  when withRealTime:
-    var steps = workPackage
-    var t0: Ticks
-    if gch.maxPause > 0: t0 = getticks()
-
-template takeTime {.dirty.} =
-  when withRealTime: dec steps
-  dec debugticker
-
-template checkTime {.dirty.} =
-  if debugticker <= 0:
-    #echo "in loop"
-    debugticker = 1000
-  when withRealTime:
-    if steps == 0:
-      steps = workPackage
-      if gch.maxPause > 0:
-        let duration = getticks() - t0
-        # the GC's measuring is not accurate and needs some cleanup actions
-        # (stack unmarking), so subtract some short amount of time in
-        # order to miss deadlines less often:
-        if duration >= gch.maxPause - 50_000:
-          return false
-
-# ---------------- dump heap ----------------
-
-template dumpHeapFile(gch: var GcHeap): File =
-  cast[File](gch.pDumpHeapFile)
-
-proc debugGraph(s: PCell) =
-  c_fprintf(gch.dumpHeapFile, "child %p\n", s)
-
-proc dumpRoot(gch: var GcHeap; s: PCell) =
-  if isAllocatedPtr(gch.region, s):
-    c_fprintf(gch.dumpHeapFile, "global_root %p\n", s)
-  else:
-    c_fprintf(gch.dumpHeapFile, "global_root_invalid %p\n", s)
-
-proc GC_dumpHeap*(file: File) =
-  ## Dumps the GCed heap's content to a file. Can be useful for
-  ## debugging. Produces an undocumented text file format that
-  ## can be translated into "dot" syntax via the "heapdump2dot" tool.
-  gch.pDumpHeapFile = file
-  var spaceIter: ObjectSpaceIter
-  when false:
-    var d = gch.decStack.d
-    for i in 0 .. gch.decStack.len-1:
-      if isAllocatedPtr(gch.region, d[i]):
-        c_fprintf(file, "onstack %p\n", d[i])
-      else:
-        c_fprintf(file, "onstack_invalid %p\n", d[i])
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-  while true:
-    let x = allObjectsAsProc(gch.region, addr spaceIter)
-    if spaceIter.state < 0: break
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      writeCell(file, "cell ", c)
-      forAllChildren(c, waDebug)
-      c_fprintf(file, "end\n")
-  gch.pDumpHeapFile = nil
-
-proc GC_dumpHeap() =
-  var f: File
-  if open(f, "heap.txt", fmWrite):
-    GC_dumpHeap(f)
-    f.close()
-  else:
-    c_fprintf(stdout, "cannot write heap.txt")
-
-# ---------------- cycle collector -------------------------------------------
-
-proc freeCyclicCell(gch: var GcHeap, c: PCell) =
-  gcAssert(isAllocatedPtr(gch.region, c), "freeCyclicCell: freed pointer?")
-  prepareDealloc(c)
-  gcTrace(c, csCycFreed)
-  when logGC: writeCell("cycle collector dealloc cell", c)
-  when reallyDealloc:
-    sysAssert(allocInv(gch.region), "free cyclic cell")
-    rawDealloc(gch.region, c)
-  else:
-    gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(Cell))
-
-proc sweep(gch: var GcHeap): bool =
-  takeStartTime(100)
-  #echo "loop start"
-  let white = 1-gch.black
-  #c_fprintf(stdout, "black is %d\n", black)
-  while true:
-    let x = allObjectsAsProc(gch.region, addr gch.spaceIter)
-    if gch.spaceIter.state < 0: break
-    takeTime()
-    if isCell(x):
-      # cast to PCell is correct here:
-      var c = cast[PCell](x)
-      gcAssert c.color != rcGrey, "cell is still grey?"
-      if c.color == white: freeCyclicCell(gch, c)
-      # Since this is incremental, we MUST not set the object to 'white' here.
-      # We could set all the remaining objects to white after the 'sweep'
-      # completed but instead we flip the meaning of black/white to save one
-      # traversal over the heap!
-    checkTime()
-  # prepare for next iteration:
-  #echo "loop end"
-  gch.spaceIter = ObjectSpaceIter()
-  result = true
-
-proc markRoot(gch: var GcHeap, c: PCell) {.inline.} =
-  if c.color == 1-gch.black:
-    c.setColor(rcGrey)
-    add(gch.greyStack, c)
-
-proc markIncremental(gch: var GcHeap): bool =
-  var L = addr(gch.greyStack.len)
-  takeStartTime(100)
-  while L[] > 0:
-    var c = gch.greyStack.d[0]
-    if not isAllocatedPtr(gch.region, c):
-      c_fprintf(stdout, "[GC] not allocated anymore: %p\n", c)
-      #GC_dumpHeap()
-      sysAssert(false, "wtf")
-
-    #sysAssert(isAllocatedPtr(gch.region, c), "markIncremental: isAllocatedPtr")
-    gch.greyStack.d[0] = gch.greyStack.d[L[] - 1]
-    dec(L[])
-    takeTime()
-    if c.color == rcGrey:
-      c.setColor(gch.black)
-      forAllChildren(c, waMarkGrey)
-    elif c.color == (1-gch.black):
-      gcAssert false, "wtf why are there white objects in the greystack?"
-    checkTime()
-  gcAssert gch.greyStack.len == 0, "markIncremental: greystack not empty "
-  result = true
-
-proc markGlobals(gch: var GcHeap) =
-  if gch.gcThreadId == 0:
-    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
-  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
-
-proc doOperation(p: pointer, op: WalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  gcAssert(c != nil, "doOperation: 1")
-  # the 'case' should be faster than function pointers because of easy
-  # prediction:
-  case op
-  of waZctDecRef:
-    #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
-    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    discard "use me for nimEscape?"
-  of waMarkGlobal:
-    template handleRoot =
-      if gch.dumpHeapFile.isNil:
-        markRoot(gch, c)
-      else:
-        dumpRoot(gch, c)
-    handleRoot()
-    discard allocInv(gch.region)
-  of waMarkGrey:
-    when false:
-      if not isAllocatedPtr(gch.region, c):
-        c_fprintf(stdout, "[GC] not allocated anymore: MarkGrey %p\n", c)
-        #GC_dumpHeap()
-        sysAssert(false, "wtf")
-    if c.color == 1-gch.black:
-      c.setColor(rcGrey)
-      add(gch.greyStack, c)
-  of waDebug: debugGraph(c)
-
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, WalkOp(op))
-
-proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
-  # the addresses are not as cells on the stack, so turn them to cells:
-  sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
-  if c >% PageSize:
-    # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-    if objStart != nil:
-      # mark the cell:
-      markRoot(gch, objStart)
-  sysAssert(allocInv(gch.region), "gcMark end")
-
-proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-  forEachStackSlot(gch, gcMark)
-
-proc collectALittle(gch: var GcHeap): bool =
-  case gch.phase
-  of Phase.None:
-    if getOccupiedMem(gch.region) >= gch.cycleThreshold:
-      gch.phase = Phase.Marking
-      markGlobals(gch)
-      result = collectALittle(gch)
-      #when false: c_fprintf(stdout, "collectALittle: introduced bug E %ld\n", gch.phase)
-      #discard allocInv(gch.region)
-  of Phase.Marking:
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    prepareForInteriorPointerChecking(gch.region)
-    markStackAndRegisters(gch)
-    inc(gch.stat.stackScans)
-    if markIncremental(gch):
-      gch.phase = Phase.Sweeping
-      gch.red = 1 - gch.red
-  of Phase.Sweeping:
-    gcAssert gch.greyStack.len == 0, "greystack not empty"
-    when hasThreadSupport:
-      for c in gch.toDispose:
-        nimGCunref(c)
-    if sweep(gch):
-      gch.phase = Phase.None
-      # flip black/white meanings:
-      gch.black = 1 - gch.black
-      gcAssert gch.red == 1 - gch.black, "red color is wrong"
-      inc(gch.stat.completedCollections)
-      result = true
-
-proc collectCTBody(gch: var GcHeap) =
-  when withRealTime:
-    let t0 = getticks()
-  sysAssert(allocInv(gch.region), "collectCT: begin")
-
-  when not nimCoroutines:
-    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-  #gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-  if collectALittle(gch):
-    gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                              CycleIncrease)
-    gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
-  sysAssert(allocInv(gch.region), "collectCT: end")
-  when withRealTime:
-    let duration = getticks() - t0
-    gch.stat.maxPause = max(gch.stat.maxPause, duration)
-    when defined(reportMissedDeadlines):
-      if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
-
-when nimCoroutines:
-  proc currentStackSizes(): int =
-    for stack in items(gch.stack):
-      result = result + stack.stackSize()
-
-proc collectCT(gch: var GcHeap) =
-  # stackMarkCosts prevents some pathological behaviour: Stack marking
-  # becomes more expensive with large stacks and large stacks mean that
-  # cells with RC=0 are more likely to be kept alive by the stack.
-  when nimCoroutines:
-    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
-  else:
-    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
-  if (gch.greyStack.len >= stackMarkCosts or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
-      gch.recGcLock == 0:
-    collectCTBody(gch)
-
-when withRealTime:
-  proc toNano(x: int): Nanos {.inline.} =
-    result = x * 1000
-
-  proc GC_setMaxPause*(MaxPauseInUs: int) =
-    gch.maxPause = MaxPauseInUs.toNano
-
-  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    gch.maxPause = us.toNano
-    #if (getOccupiedMem(gch.region)>=gch.cycleThreshold) or
-    #    alwaysGC or strongAdvice:
-    collectCTBody(gch)
-
-  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
-    if stackSize >= 0:
-      var stackTop {.volatile.}: pointer
-      gch.getActiveStack().pos = addr(stackTop)
-
-      for stack in gch.stack.items():
-        stack.bottomSaved = stack.bottom
-        when stackIncreases:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
-        else:
-          stack.bottom = cast[pointer](
-            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
-
-    GC_step(gch, us, strongAdvice)
-
-    if stackSize >= 0:
-      for stack in gch.stack.items():
-        stack.bottom = stack.bottomSaved
-
-when not defined(useNimRtl):
-  proc GC_disable() =
-    inc(gch.recGcLock)
-  proc GC_enable() =
-    if gch.recGcLock > 0:
-      dec(gch.recGcLock)
-
-  proc GC_setStrategy(strategy: GC_Strategy) =
-    discard
-
-  proc GC_enableMarkAndSweep() = discard
-  proc GC_disableMarkAndSweep() = discard
-
-  proc GC_fullCollect() =
-    var oldThreshold = gch.cycleThreshold
-    gch.cycleThreshold = 0 # forces cycle collection
-    collectCT(gch)
-    gch.cycleThreshold = oldThreshold
-
-  proc GC_getStatistics(): string =
-    GC_disable()
-    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
-             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
-             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
-             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-             "[GC] completed collections: " & $gch.stat.completedCollections & "\n" &
-             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] grey stack capacity: " & $gch.greyStack.cap & "\n" &
-             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
-    when nimCoroutines:
-      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
-      for stack in items(gch.stack):
-        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
-    else:
-      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
-    GC_enable()
-
-{.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index f5f4f164f..eb0884560 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -222,9 +222,9 @@ proc stackSize(stack: ptr GcStack): int {.noinline.} =
 
   if pos != nil:
     when stackIncreases:
-      result = cast[ByteAddress](pos) -% cast[ByteAddress](stack.bottom)
+      result = cast[int](pos) -% cast[int](stack.bottom)
     else:
-      result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](pos)
+      result = cast[int](stack.bottom) -% cast[int](pos)
   else:
     result = 0
 
@@ -295,8 +295,8 @@ when not defined(useNimRtl):
       # the first init must be the one that defines the stack bottom:
       gch.stack.bottom = theStackBottom
     elif theStackBottom != gch.stack.bottom:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stack.bottom)
+      var a = cast[int](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[int](gch.stack.bottom)
       #c_fprintf(stdout, "old: %p new: %p;\n",gch.stack.bottom,theStackBottom)
       when stackIncreases:
         gch.stack.bottom = cast[pointer](min(a, b))
@@ -312,11 +312,11 @@ when not defined(useNimRtl):
 proc isOnStack(p: pointer): bool =
   var stackTop {.volatile, noinit.}: pointer
   stackTop = addr(stackTop)
-  var a = cast[ByteAddress](gch.getActiveStack().bottom)
-  var b = cast[ByteAddress](stackTop)
+  var a = cast[int](gch.getActiveStack().bottom)
+  var b = cast[int](stackTop)
   when not stackIncreases:
     swap(a, b)
-  var x = cast[ByteAddress](p)
+  var x = cast[int](p)
   result = a <=% x and x <=% b
 
 when defined(sparc): # For SPARC architecture.
@@ -337,7 +337,7 @@ when defined(sparc): # For SPARC architecture.
     # Addresses decrease as the stack grows.
     while sp <= max:
       gcMark(gch, sp[])
-      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
+      sp = cast[PPointer](cast[int](sp) +% sizeof(pointer))
 
 elif defined(ELATE):
   {.error: "stack marking code is to be written for this architecture".}
@@ -354,8 +354,8 @@ elif stackIncreases:
 
   template forEachStackSlotAux(gch, gcMark: untyped) {.dirty.} =
     for stack in gch.stack.items():
-      var max = cast[ByteAddress](gch.stack.bottom)
-      var sp = cast[ByteAddress](addr(registers)) -% sizeof(pointer)
+      var max = cast[int](gch.stack.bottom)
+      var sp = cast[int](addr(registers)) -% sizeof(pointer)
       while sp >=% max:
         gcMark(gch, cast[PPointer](sp)[])
         sp = sp -% sizeof(pointer)
@@ -383,15 +383,14 @@ else:
     gch.getActiveStack().setPosition(addr(registers))
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
       for stack in gch.stack.items():
-        var max = cast[ByteAddress](stack.bottom)
-        var sp = cast[ByteAddress](addr(registers))
+        var max = cast[int](stack.bottom)
+        var sp = cast[int](addr(registers))
         when defined(amd64):
           if stack.isActiveStack():
             # words within the jmp_buf structure may not be properly aligned.
             let regEnd = sp +% sizeof(registers)
             while sp <% regEnd:
               gcMark(gch, cast[PPointer](sp)[])
-              gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
               sp = sp +% sizeof(pointer)
         # Make sure sp is word-aligned
         sp = sp and not (sizeof(pointer) - 1)
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index f91b37b94..c885a6893 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -27,7 +27,7 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 when hasThreadSupport:
-  import sharedlist
+  import std/sharedlist
 
 type
   WalkOp = enum
@@ -94,11 +94,11 @@ template gcAssert(cond: bool, msg: string) =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
@@ -217,7 +217,7 @@ proc initGC() =
     gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -229,7 +229,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
-  var d = cast[ByteAddress](dest)
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
@@ -255,7 +255,7 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
       when not defined(nimSeqsV2):
-        var d = cast[ByteAddress](cellToUsr(cell))
+        var d = cast[int](cellToUsr(cell))
         var s = cast[PGenericSeq](d)
         if s != nil:
           for i in 0..s.len-1:
@@ -268,7 +268,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch, size + sizeof(Cell))
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   when leakDetector and not hasThreadSupport:
@@ -336,9 +336,9 @@ when not defined(nimSeqsV2):
 
     var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
     copyMem(res, ol, oldsize + sizeof(Cell))
-    zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
+    zeroMem(cast[pointer](cast[int](res)+% oldsize +% sizeof(Cell)),
             newsize-oldsize)
-    sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+    sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
     when withBitvectors: incl(gch.allocated, res)
     when useCellIds:
       inc gch.idGenerator
@@ -446,7 +446,7 @@ proc markGlobals(gch: var GcHeap) =
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
-  var c = cast[ByteAddress](p)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 9acaae88b..a26aff982 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -59,7 +59,7 @@ type
     tyOwned, tyUnused1, tyUnused2,
     tyVarargsHidden,
     tyUncheckedArray,
-    tyProxyHidden,
+    tyErrorHidden,
     tyBuiltInTypeClassHidden,
     tyUserTypeClassHidden,
     tyUserTypeClassInstHidden,
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index 42c85ad26..3bf0b9893 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -47,5 +47,4 @@ else:
 
 {.pragma: benign, gcsafe.}
 
-when defined(nimHasSinkInference):
-  {.push sinkInference: on.}
+{.push sinkInference: on.}
diff --git a/lib/system/indices.nim b/lib/system/indices.nim
index fd6770e23..f2bad2528 100644
--- a/lib/system/indices.nim
+++ b/lib/system/indices.nim
@@ -10,7 +10,7 @@ template `^`*(x: int): BackwardsIndex = BackwardsIndex(x)
   ## Builtin `roof`:idx: operator that can be used for convenient array access.
   ## `a[^x]` is a shortcut for `a[a.len-x]`.
   ##
-  ##   ```
+  ##   ```nim
   ##   let
   ##     a = [1, 3, 5, 7, 9]
   ##     b = "abcdefgh"
@@ -46,7 +46,7 @@ template `..^`*(a, b: untyped): untyped =
 
 template `..<`*(a, b: untyped): untyped =
   ## A shortcut for `a .. pred(b)`.
-  ##   ```
+  ##   ```nim
   ##   for i in 5 ..< 9:
   ##     echo i # => 5; 6; 7; 8
   ##   ```
@@ -76,7 +76,7 @@ template spliceImpl(s, a, L, b: typed): untyped =
 proc `[]`*[T, U: Ordinal](s: string, x: HSlice[T, U]): string {.inline, systemRaisesDefect.} =
   ## Slice operation for strings.
   ## Returns the inclusive range `[s[x.a], s[x.b]]`:
-  ##   ```
+  ##   ```nim
   ##   var s = "abcdef"
   ##   assert s[1..3] == "bcd"
   ##   ```
@@ -106,10 +106,13 @@ proc `[]=`*[T, U: Ordinal](s: var string, x: HSlice[T, U], b: string) {.systemRa
 proc `[]`*[Idx, T; U, V: Ordinal](a: array[Idx, T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
   ## Slice operation for arrays.
   ## Returns the inclusive range `[a[x.a], a[x.b]]`:
-  ##   ```
+  ##   ```nim
   ##   var a = [1, 2, 3, 4]
   ##   assert a[0..2] == @[1, 2, 3]
   ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(array[I, T];I,I) <#toOpenArray,array[I,T],I,I>`_
   let xa = a ^^ x.a
   let L = (a ^^ x.b) - xa + 1
   result = newSeq[T](L)
@@ -117,7 +120,7 @@ proc `[]`*[Idx, T; U, V: Ordinal](a: array[Idx, T], x: HSlice[U, V]): seq[T] {.s
 
 proc `[]=`*[Idx, T; U, V: Ordinal](a: var array[Idx, T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
   ## Slice assignment for arrays.
-  ##   ```
+  ##   ```nim
   ##   var a = [10, 20, 30, 40, 50]
   ##   a[1..2] = @[99, 88]
   ##   assert a == [10, 99, 88, 40, 50]
@@ -132,10 +135,13 @@ proc `[]=`*[Idx, T; U, V: Ordinal](a: var array[Idx, T], x: HSlice[U, V], b: ope
 proc `[]`*[T; U, V: Ordinal](s: openArray[T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
   ## Slice operation for sequences.
   ## Returns the inclusive range `[s[x.a], s[x.b]]`:
-  ##   ```
+  ##   ```nim
   ##   var s = @[1, 2, 3, 4]
   ##   assert s[0..2] == @[1, 2, 3]
   ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(openArray[T];int,int) <#toOpenArray,openArray[T],int,int>`_
   let a = s ^^ x.a
   let L = (s ^^ x.b) - a + 1
   newSeq(result, L)
diff --git a/lib/system/iterators.nim b/lib/system/iterators.nim
index b16a605b7..125bee98f 100644
--- a/lib/system/iterators.nim
+++ b/lib/system/iterators.nim
@@ -3,17 +3,22 @@
 when defined(nimPreviewSlimSystem):
   import std/assertions
 
-when defined(nimHasLentIterators) and not defined(nimNoLentIterators):
+when not defined(nimNoLentIterators):
   template lent2(T): untyped = lent T
 else:
   template lent2(T): untyped = T
 
+template unCheckedInc(x) =
+  {.push overflowChecks: off.}
+  inc(x)
+  {.pop.}
+
 iterator items*[T: not char](a: openArray[T]): lent2 T {.inline.} =
   ## Iterates over each item of `a`.
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator items*[T: char](a: openArray[T]): T {.inline.} =
   ## Iterates over each item of `a`.
@@ -23,14 +28,14 @@ iterator items*[T: char](a: openArray[T]): T {.inline.} =
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator mitems*[T](a: var openArray[T]): var T {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
   var i = 0
   while i < len(a):
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
 
 iterator items*[IX, T](a: array[IX, T]): T {.inline.} =
   ## Iterates over each item of `a`.
@@ -39,7 +44,7 @@ iterator items*[IX, T](a: array[IX, T]): T {.inline.} =
     while true:
       yield a[i]
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator mitems*[IX, T](a: var array[IX, T]): var T {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
@@ -48,7 +53,7 @@ iterator mitems*[IX, T](a: var array[IX, T]): var T {.inline.} =
     while true:
       yield a[i]
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator items*[T](a: set[T]): T {.inline.} =
   ## Iterates over each element of `a`. `items` iterates only over the
@@ -56,8 +61,11 @@ iterator items*[T](a: set[T]): T {.inline.} =
   ## able to hold).
   var i = low(T).int
   while i <= high(T).int:
-    if T(i) in a: yield T(i)
-    inc(i)
+    when T is enum and not defined(js):
+      if cast[T](i) in a: yield cast[T](i)
+    else:
+      if T(i) in a: yield T(i)
+    unCheckedInc(i)
 
 iterator items*(a: cstring): char {.inline.} =
   ## Iterates over each item of `a`.
@@ -76,7 +84,7 @@ iterator items*(a: cstring): char {.inline.} =
     let n = len(a)
     while i < n:
       yield a[i]
-      inc(i)
+      unCheckedInc(i)
   when defined(js): impl()
   else:
     when nimvm:
@@ -86,7 +94,7 @@ iterator items*(a: cstring): char {.inline.} =
       var i = 0
       while a[i] != '\0':
         yield a[i]
-        inc(i)
+        unCheckedInc(i)
 
 iterator mitems*(a: var cstring): var char {.inline.} =
   ## Iterates over each item of `a` so that you can modify the yielded value.
@@ -109,7 +117,7 @@ iterator mitems*(a: var cstring): var char {.inline.} =
     let n = len(a)
     while i < n:
       yield a[i]
-      inc(i)
+      unCheckedInc(i)
   when defined(js): impl()
   else:
     when nimvm: impl()
@@ -117,7 +125,7 @@ iterator mitems*(a: var cstring): var char {.inline.} =
       var i = 0
       while a[i] != '\0':
         yield a[i]
-        inc(i)
+        unCheckedInc(i)
 
 iterator items*[T: enum and Ordinal](E: typedesc[T]): T =
   ## Iterates over the values of `E`.
@@ -140,7 +148,7 @@ iterator pairs*[T](a: openArray[T]): tuple[key: int, val: T] {.inline.} =
   var i = 0
   while i < len(a):
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
 
 iterator mpairs*[T](a: var openArray[T]): tuple[key: int, val: var T]{.inline.} =
   ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
@@ -148,7 +156,7 @@ iterator mpairs*[T](a: var openArray[T]): tuple[key: int, val: var T]{.inline.}
   var i = 0
   while i < len(a):
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
 
 iterator pairs*[IX, T](a: array[IX, T]): tuple[key: IX, val: T] {.inline.} =
   ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
@@ -157,7 +165,7 @@ iterator pairs*[IX, T](a: array[IX, T]): tuple[key: IX, val: T] {.inline.} =
     while true:
       yield (i, a[i])
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator mpairs*[IX, T](a: var array[IX, T]): tuple[key: IX, val: var T] {.inline.} =
   ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
@@ -167,7 +175,7 @@ iterator mpairs*[IX, T](a: var array[IX, T]): tuple[key: IX, val: var T] {.inlin
     while true:
       yield (i, a[i])
       if i >= high(IX): break
-      inc(i)
+      unCheckedInc(i)
 
 iterator pairs*[T](a: seq[T]): tuple[key: int, val: T] {.inline.} =
   ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
@@ -175,7 +183,7 @@ iterator pairs*[T](a: seq[T]): tuple[key: int, val: T] {.inline.} =
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator mpairs*[T](a: var seq[T]): tuple[key: int, val: var T] {.inline.} =
@@ -185,7 +193,7 @@ iterator mpairs*[T](a: var seq[T]): tuple[key: int, val: var T] {.inline.} =
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator pairs*(a: string): tuple[key: int, val: char] {.inline.} =
@@ -194,7 +202,7 @@ iterator pairs*(a: string): tuple[key: int, val: char] {.inline.} =
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator mpairs*(a: var string): tuple[key: int, val: var char] {.inline.} =
@@ -204,7 +212,7 @@ iterator mpairs*(a: var string): tuple[key: int, val: var char] {.inline.} =
   let L = len(a)
   while i < L:
     yield (i, a[i])
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator pairs*(a: cstring): tuple[key: int, val: char] {.inline.} =
@@ -214,12 +222,12 @@ iterator pairs*(a: cstring): tuple[key: int, val: char] {.inline.} =
     var L = len(a)
     while i < L:
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
   else:
     var i = 0
     while a[i] != '\0':
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
 
 iterator mpairs*(a: var cstring): tuple[key: int, val: var char] {.inline.} =
   ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
@@ -229,12 +237,12 @@ iterator mpairs*(a: var cstring): tuple[key: int, val: var char] {.inline.} =
     var L = len(a)
     while i < L:
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
   else:
     var i = 0
     while a[i] != '\0':
       yield (i, a[i])
-      inc(i)
+      unCheckedInc(i)
 
 iterator items*[T](a: seq[T]): lent2 T {.inline.} =
   ## Iterates over each item of `a`.
@@ -242,7 +250,7 @@ iterator items*[T](a: seq[T]): lent2 T {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator mitems*[T](a: var seq[T]): var T {.inline.} =
@@ -251,7 +259,7 @@ iterator mitems*[T](a: var seq[T]): var T {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the seq changed while iterating over it")
 
 iterator items*(a: string): char {.inline.} =
@@ -260,7 +268,7 @@ iterator items*(a: string): char {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 iterator mitems*(a: var string): var char {.inline.} =
@@ -269,7 +277,7 @@ iterator mitems*(a: var string): var char {.inline.} =
   let L = len(a)
   while i < L:
     yield a[i]
-    inc(i)
+    unCheckedInc(i)
     assert(len(a) == L, "the length of the string changed while iterating over it")
 
 
diff --git a/lib/system/iterators_1.nim b/lib/system/iterators_1.nim
index be61fd62c..d00e3f823 100644
--- a/lib/system/iterators_1.nim
+++ b/lib/system/iterators_1.nim
@@ -16,7 +16,7 @@ iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
     let x = collect(newSeq):
       for i in countdown(7, 3):
         i
-    
+
     assert x == @[7, 6, 5, 4, 3]
 
     let y = collect(newseq):
@@ -32,7 +32,10 @@ iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
   elif T is IntLikeForCount and T is Ordinal:
     var res = int(a)
     while res >= int(b):
-      yield T(res)
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
       dec(res, step)
   else:
     var res = a
@@ -64,7 +67,10 @@ iterator countup*[T](a, b: T, step: Positive = 1): T {.inline.} =
   when T is IntLikeForCount and T is Ordinal:
     var res = int(a)
     while res <= int(b):
-      yield T(res)
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
       inc(res, step)
   else:
     var res = a
@@ -89,7 +95,10 @@ iterator `..`*[T](a, b: T): T {.inline.} =
   when T is IntLikeForCount and T is Ordinal:
     var res = int(a)
     while res <= int(b):
-      yield T(res)
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
       inc(res)
   else:
     var res = a
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 9dfa80877..5599240fd 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -49,7 +49,7 @@ proc nimCharToStr(x: char): string {.compilerproc.} =
   result[0] = x
 
 proc isNimException(): bool {.asmNoStackFrame.} =
-  asm "return `lastJSError` && `lastJSError`.m_type;"
+  {.emit: "return `lastJSError` && `lastJSError`.m_type;".}
 
 proc getCurrentException*(): ref Exception {.compilerRtl, benign.} =
   if isNimException(): result = cast[ref Exception](lastJSError)
@@ -72,6 +72,10 @@ proc getCurrentExceptionMsg*(): string =
 proc setCurrentException*(exc: ref Exception) =
   lastJSError = cast[PJSError](exc)
 
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  ## Used to set up exception handling for closure iterators
+  setCurrentException(e)
+
 proc auxWriteStackTrace(f: PCallFrame): string =
   type
     TempFrame = tuple[procname: cstring, line: int, filename: cstring]
@@ -148,7 +152,7 @@ proc raiseException(e: ref Exception, ename: cstring) {.
     unhandledException(e)
   when NimStackTrace:
     e.trace = rawWriteStackTrace()
-  asm "throw `e`;"
+  {.emit: "throw `e`;".}
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
   if lastJSError == nil:
@@ -158,7 +162,7 @@ proc reraiseException() {.compilerproc, asmNoStackFrame.} =
       if isNimException():
         unhandledException(cast[ref Exception](lastJSError))
 
-    asm "throw lastJSError;"
+    {.emit: "throw lastJSError;".}
 
 proc raiseOverflow {.exportc: "raiseOverflow", noreturn, compilerproc.} =
   raise newException(OverflowDefect, "over- or underflow")
@@ -176,7 +180,7 @@ proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noreturn.} =
   raise newException(FieldDefect, formatFieldDefect(f, discVal))
 
 proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var i = 0; i < arguments.length; ++i) {
       var x = arguments[i];
@@ -189,7 +193,7 @@ proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
       }
     }
     return result;
-  """
+  """.}
 
 proc makeNimstrLit(c: cstring): string {.asmNoStackFrame, compilerproc.} =
   {.emit: """
@@ -277,62 +281,64 @@ proc toJSStr(s: string): cstring {.compilerproc.} =
   result = join(res)
 
 proc mnewString(len: int): string {.asmNoStackFrame, compilerproc.} =
-  asm """
-    return new Array(`len`);
-  """
+  {.emit: """
+    var result = new Array(`len`);
+    for (var i = 0; i < `len`; i++) {result[i] = 0;}
+    return result;
+  """.}
 
 proc SetCard(a: int): int {.compilerproc, asmNoStackFrame.} =
   # argument type is a fake
-  asm """
+  {.emit: """
     var result = 0;
     for (var elem in `a`) { ++result; }
     return result;
-  """
+  """.}
 
 proc SetEq(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     for (var elem in `b`) { if (!`a`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLe(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLt(a, b: int): bool {.compilerproc.} =
   result = SetLe(a, b) and not SetEq(a, b)
 
 proc SetMul(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
 proc SetPlus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) { result[elem] = true; }
     for (var elem in `b`) { result[elem] = true; }
     return result;
-  """
+  """.}
 
 proc SetMinus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
-  asm """
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (!`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
 proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` == `b`) return 0;
     if (!`a`) return -1;
     if (!`b`) return 1;
@@ -341,7 +347,7 @@ proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
       if (result != 0) return result;
     }
     return `a`.length - `b`.length;
-  """
+  """.}
 
 proc cmp(x, y: string): int =
   when nimvm:
@@ -352,7 +358,7 @@ proc cmp(x, y: string): int =
     result = cmpStrings(x, y)
 
 proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` == `b`) return true;
     if (`a` === null && `b`.length == 0) return true;
     if (`b` === null && `a`.length == 0) return true;
@@ -362,29 +368,29 @@ proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
     for (var i = 0; i < alen; ++i)
       if (`a`[i] != `b`[i]) return false;
     return true;
-  """
+  """.}
 
 when defined(kwin):
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       print(buf);
-    """
+    """.}
 
 elif not defined(nimOldEcho):
   proc ewriteln(x: cstring) = log(x)
 
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
-    asm """
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       console.log(buf);
-    """
+    """.}
 
 else:
   proc ewriteln(x: cstring) =
@@ -412,84 +418,84 @@ else:
 
 # Arithmetic:
 proc checkOverflowInt(a: int) {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`a` > 2147483647 || `a` < -2147483648) `raiseOverflow`();
-  """
+  """.}
 
 proc addInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` + `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc subInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` - `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc mulInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     var result = `a` * `b`;
     `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
 proc divInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` / `b`);
-  """
+  """.}
 
 proc modInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
     return Math.trunc(`a` % `b`);
-  """
+  """.}
 
-proc checkOverflowInt64(a: int) {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`a` > 9223372036854775807 || `a` < -9223372036854775808) `raiseOverflow`();
-  """
+proc checkOverflowInt64(a: int64) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 9223372036854775807n || `a` < -9223372036854775808n) `raiseOverflow`();
+  """.}
 
-proc addInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc addInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` + `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc subInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc subInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` - `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc mulInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
+proc mulInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` * `b`;
     `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc divInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` / `b`);
-  """
+proc divInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` / `b`;
+  """.}
 
-proc modInt64(a, b: int): int {.asmNoStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.trunc(`a` % `b`);
-  """
+proc modInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` % `b`;
+  """.}
 
 proc negInt(a: int): int {.compilerproc.} =
   result = a*(-1)
@@ -503,32 +509,10 @@ proc absInt(a: int): int {.compilerproc.} =
 proc absInt64(a: int64): int64 {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
-when not defined(nimNoZeroExtendMagic):
-  proc ze*(a: int): int {.compilerproc.} =
-    result = a
-
-  proc ze64*(a: int64): int64 {.compilerproc.} =
-    result = a
-
-  proc toU8*(a: int): int8 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
-  proc toU16*(a: int): int16 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
-  proc toU32*(a: int64): int32 {.asmNoStackFrame, compilerproc.} =
-    asm """
-      return `a`;
-    """
-
 proc nimMin(a, b: int): int {.compilerproc.} = return if a <= b: a else: b
 proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 
-proc chckNilDisp(p: pointer) {.compilerproc.} =
+proc chckNilDisp(p: JSRef) {.compilerproc.} =
   if p == nil:
     sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
 
@@ -546,22 +530,22 @@ proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
   of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
-    """
+    """.}
   of nkList:
-    asm """
+    {.emit: """
     for (var i = 0; i < `n`.sons.length; i++) {
       nimCopyAux(`dest`, `src`, `n`.sons[i]);
     }
-    """
+    """.}
   of nkCase:
-    asm """
+    {.emit: """
       `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
       for (var i = 0; i < `n`.sons.length; ++i) {
         nimCopyAux(`dest`, `src`, `n`.sons[i][1]);
       }
-    """
+    """.}
 
 proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
@@ -569,9 +553,9 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
     if not isFatPointer(ti):
       result = src
     else:
-      asm "`result` = [`src`[0], `src`[1]];"
+      {.emit: "`result` = [`src`[0], `src`[1]];".}
   of tySet:
-    asm """
+    {.emit: """
       if (`dest` === null || `dest` === undefined) {
         `dest` = {};
       }
@@ -580,18 +564,18 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
       }
       for (var key in `src`) { `dest`[key] = `src`[key]; }
       `result` = `dest`;
-    """
+    """.}
   of tyTuple, tyObject:
     if ti.base != nil: result = nimCopy(dest, src, ti.base)
     elif ti.kind == tyObject:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;".}
     else:
-      asm "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;".}
     nimCopyAux(result, src, ti.node)
   of tyArrayConstr, tyArray:
     # In order to prevent a type change (TypedArray -> Array) and to have better copying performance,
     # arrays constructors are considered separately
-    asm """
+    {.emit: """
       if(ArrayBuffer.isView(`src`)) { 
         if(`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
           `dest` = new `src`.constructor(`src`);
@@ -613,9 +597,9 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
           }
         }
       }
-    """
+    """.}
   of tySequence, tyOpenArray:
-    asm """
+    {.emit: """
       if (`src` === null) {
         `result` = null;
       }
@@ -628,55 +612,24 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
           `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
         }
       }
-    """
+    """.}
   of tyString:
-    asm """
+    {.emit: """
       if (`src` !== null) {
         `result` = `src`.slice(0);
       }
-    """
+    """.}
   else:
     result = src
 
-proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
-  asm "`result` = null;"
-  case ti.kind
-  of tyPtr, tyRef, tyVar, tyNil:
-    if isFatPointer(ti):
-      asm """
-        `result` = [null, 0];
-      """
-  of tySet:
-    asm """
-      `result` = {};
-    """
-  of tyTuple, tyObject:
-    if ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
-    else:
-      asm "`result` = {};"
-  of tySequence, tyOpenArray, tyString:
-    asm """
-      `result` = [];
-    """
-  of tyArrayConstr, tyArray:
-    asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = genericReset(`x`[i], `ti`.base);
-      }
-    """
-  else:
-    discard
-
 proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
                 asmNoStackFrame, compilerproc.} =
   # types are fake
-  asm """
+  {.emit: """
     var result = new Array(`len`);
     for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
     return result;
-  """
+  """.}
 
 proc chckIndx(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
@@ -705,7 +658,7 @@ proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
   return true
 
 proc addChar(x: string, c: char) {.compilerproc, asmNoStackFrame.} =
-  asm "`x`.push(`c`);"
+  {.emit: "`x`.push(`c`);".}
 
 {.pop.}
 
@@ -732,9 +685,9 @@ proc parseFloatNative(a: openarray[char]): float =
 
   let cstr = cstring str
 
-  asm """
+  {.emit: """
   `result` = Number(`cstr`);
-  """
+  """.}
 
 proc nimParseBiggestFloat(s: openarray[char], number: var BiggestFloat): int {.compilerproc.} =
   var sign: bool
@@ -801,3 +754,15 @@ if (!Math.trunc) {
   };
 }
 """.}
+
+proc cmpClosures(a, b: JSRef): bool {.compilerproc, asmNoStackFrame.} =
+  # Both `a` and `b` need to be a closure
+  {.emit: """
+    if (`a` !== null && `a`.ClP_0 !== undefined &&
+        `b` !== null && `b`.ClP_0 !== undefined) {
+      return `a`.ClP_0 == `b`.ClP_0 && `a`.ClE_0 == `b`.ClE_0;
+    } else {
+      return `a` == `b`;
+    }
+  """
+  .}
diff --git a/lib/system/memory.nim b/lib/system/memory.nim
index ebda60d8d..156773c48 100644
--- a/lib/system/memory.nim
+++ b/lib/system/memory.nim
@@ -43,6 +43,7 @@ proc nimCmpMem*(a, b: pointer, size: Natural): cint {.compilerproc, nonReloadabl
       inc i
 
 proc nimCStrLen*(a: cstring): int {.compilerproc, nonReloadable, inline.} =
+  if a.isNil: return 0
   when useLibC:
     cast[int](c_strlen(a))
   else:
diff --git a/lib/system/mm/go.nim b/lib/system/mm/go.nim
index 9ec25fb65..8f3aeb964 100644
--- a/lib/system/mm/go.nim
+++ b/lib/system/mm/go.nim
@@ -109,7 +109,7 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   writebarrierptr(addr(result), newSeq(typ, len))
 
 proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
-  result = newObj(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
+  result = newObjNoInit(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
   cast[PGenericSeq](result).len = 0
   cast[PGenericSeq](result).reserved = cap
   cast[PGenericSeq](result).elemSize = typ.base.size
diff --git a/lib/system/mm/malloc.nim b/lib/system/mm/malloc.nim
index d41dce705..47f1a95ae 100644
--- a/lib/system/mm/malloc.nim
+++ b/lib/system/mm/malloc.nim
@@ -22,7 +22,7 @@ proc reallocImpl(p: pointer, newSize: Natural): pointer =
 proc realloc0Impl(p: pointer, oldsize, newSize: Natural): pointer =
   result = realloc(p, newSize.csize_t)
   if newSize > oldSize:
-    zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
 
 proc deallocImpl(p: pointer) =
   c_free(p)
@@ -88,7 +88,7 @@ type
 
 proc alloc(r: var MemRegion, size: int): pointer =
   result = alloc(size)
-proc alloc0Impl(r: var MemRegion, size: int): pointer =
+proc alloc0(r: var MemRegion, size: int): pointer =
   result = alloc0Impl(size)
 proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
 proc deallocOsPages(r: var MemRegion) = discard
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index 0b49ea2e7..cf81f6d86 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -86,12 +86,12 @@ proc patchFile*(package, filename, replacement: string) =
   ## is interpreted to be local to the Nimscript file that contains
   ## the call to `patchFile`, Nim's `--path` is not used at all
   ## to resolve the filename!
+  ## The compiler also performs `path substitution <nimc.html#compiler-usage-commandminusline-switches>`_ on `replacement`.
   ##
   ## Example:
-  ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##   patchFile("stdlib", "asyncdispatch", "patches/replacement")
+  ##   ```
   discard
 
 proc getCommand*(): string =
@@ -158,20 +158,19 @@ proc strip(s: string): string =
 template `--`*(key, val: untyped) =
   ## A shortcut for `switch <#switch,string,string>`_
   ## Example:
-  ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##   --path:somePath # same as switch("path", "somePath")
   ##   --path:"someOtherPath" # same as switch("path", "someOtherPath")
+  ##   --hint:"[Conf]:off" # same as switch("hint", "[Conf]:off")
+  ##   ```
   switch(strip(astToStr(key)), strip(astToStr(val)))
 
 template `--`*(key: untyped) =
   ## A shortcut for `switch <#switch,string,string>`_
   ## Example:
-  ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##   --listCmd # same as switch("listCmd")
+  ##   ```
   switch(strip(astToStr(key)))
 
 type
@@ -254,11 +253,12 @@ proc cpDir*(`from`, to: string) {.raises: [OSError].} =
 proc exec*(command: string) {.
   raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external process. If the external process terminates with
-  ## a non-zero exit code, an OSError exception is raised.
+  ## a non-zero exit code, an OSError exception is raised. The command is
+  ## executed relative to the current source path.
   ##
-  ## **Note:** If you need a version of `exec` that returns the exit code
-  ## and text output of the command, you can use `system.gorgeEx
-  ## <system.html#gorgeEx,string,string,string>`_.
+  ## .. note:: If you need a version of `exec` that returns the exit code
+  ##   and text output of the command, you can use `system.gorgeEx
+  ##   <system.html#gorgeEx,string,string,string>`_.
   log "exec: " & command:
     if rawExec(command) != 0:
       raise newException(OSError, "FAILED: " & command)
@@ -268,11 +268,17 @@ proc exec*(command: string, input: string, cache = "") {.
   raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
   ## Executes an external process. If the external process terminates with
   ## a non-zero exit code, an OSError exception is raised.
+  ##
+  ## .. warning:: This version of `exec` is executed relative to the nimscript
+  ##   module path, which affects how the command resolves relative paths. Thus
+  ##   it is generally better to use `gorgeEx` directly when you need more
+  ##   control over the execution environment or when working with commands
+  ##   that deal with relative paths.
   log "exec: " & command:
     let (output, exitCode) = gorgeEx(command, input, cache)
+    echo output
     if exitCode != 0:
       raise newException(OSError, "FAILED: " & command)
-    echo output
 
 proc selfExec*(command: string) {.
   raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
@@ -340,12 +346,12 @@ template withDir*(dir: string; body: untyped): untyped =
   ##
   ## If you need a permanent change, use the `cd() <#cd,string>`_ proc.
   ## Usage example:
-  ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   # inside /some/path/
   ##   withDir "foo":
   ##     # move to /some/path/foo/
   ##   # back in /some/path/
+  ##   ```
   let curDir = getCurrentDir()
   try:
     cd(dir)
@@ -390,10 +396,10 @@ when not defined(nimble):
     ## Defines a task. Hidden tasks are supported via an empty description.
     ##
     ## Example:
-    ##
-    ## .. code-block:: nim
-    ##  task build, "default build is via the C backend":
-    ##    setCommand "c"
+    ##   ```nim
+    ##   task build, "default build is via the C backend":
+    ##     setCommand "c"
+    ##   ```
     ##
     ## For a task named `foo`, this template generates a `proc` named
     ## `fooTask`.  This is useful if you need to call one task in
@@ -401,13 +407,14 @@ when not defined(nimble):
     ##
     ## Example:
     ##
-    ## .. code-block:: nim
-    ##  task foo, "foo":        # > nim foo
-    ##    echo "Running foo"    # Running foo
+    ##   ```nim
+    ##   task foo, "foo":        # > nim foo
+    ##     echo "Running foo"    # Running foo
     ##
-    ##  task bar, "bar":        # > nim bar
-    ##    echo "Running bar"    # Running bar
-    ##    fooTask()             # Running foo
+    ##   task bar, "bar":        # > nim bar
+    ##     echo "Running bar"    # Running bar
+    ##     fooTask()             # Running foo
+    ##   ```
     proc `name Task`*() =
       setCommand "nop"
       body
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
index 83b983ee1..c02a24989 100644
--- a/lib/system/orc.nim
+++ b/lib/system/orc.nim
@@ -81,10 +81,14 @@ proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
 
 include threadids
 
-when logOrc:
+when logOrc or orcLeakDetector:
   proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
-    cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
-      msg, desc.name, s.refId, s.rootIdx, s.rc shr rcShift, s.color, getThreadId())
+    when orcLeakDetector:
+      cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.filename, s.line, s.color, getThreadId())
+    else:
+      cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.refId, s.rootIdx, s.rc shr rcShift, s.color, getThreadId())
 
 proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
   when traceCollector:
@@ -142,7 +146,7 @@ proc unregisterCycle(s: Cell) =
   let idx = s.rootIdx-1
   when false:
     if idx >= roots.len or idx < 0:
-      cprintf("[Bug!] %ld\n", idx)
+      cprintf("[Bug!] %ld %ld\n", idx, roots.len)
       rawQuit 1
   roots.d[idx] = roots.d[roots.len-1]
   roots.d[idx][0].rootIdx = idx+1
@@ -299,6 +303,14 @@ proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
         t.setColor(colBlack)
         trace(t, desc, j)
 
+const
+  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
+
+when defined(nimStressOrc):
+  const rootsThreshold = 10 # broken with -d:nimStressOrc: 10 and for havlak iterations 1..8
+else:
+  var rootsThreshold {.threadvar.}: int
+
 proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
   # pretty direct translation from
   # https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
@@ -337,20 +349,28 @@ proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
     s.rootIdx = 0
     collectColor(s, roots.d[i][1], colToCollect, j)
 
+  # Bug #22927: `free` calls destructors which can append to `roots`.
+  # We protect against this here by setting `roots.len` to 0 and also
+  # setting the threshold so high that no cycle collection can be triggered
+  # until we are out of this critical section:
+  when not defined(nimStressOrc):
+    let oldThreshold = rootsThreshold
+    rootsThreshold = high(int)
+  roots.len = 0
+
   for i in 0 ..< j.toFree.len:
+    when orcLeakDetector:
+      writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
     free(j.toFree.d[i][0], j.toFree.d[i][1])
 
+  when not defined(nimStressOrc):
+    rootsThreshold = oldThreshold
+
   inc j.freed, j.toFree.len
   deinit j.toFree
-  #roots.len = 0
-
-const
-  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
 
-when defined(nimStressOrc):
-  const rootsThreshold = 10 # broken with -d:nimStressOrc: 10 and for havlak iterations 1..8
-else:
-  var rootsThreshold = defaultThreshold
+when defined(nimOrcStats):
+  var freedCyclicObjects {.threadvar.}: int
 
 proc partialCollect(lowMark: int) =
   when false:
@@ -365,6 +385,8 @@ proc partialCollect(lowMark: int) =
       roots.len - lowMark)
   roots.len = lowMark
   deinit j.traceStack
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
 
 proc collectCycles() =
   ## Collect cycles.
@@ -385,33 +407,44 @@ proc collectCycles() =
     collectCyclesBacon(j, 0)
 
   deinit j.traceStack
-  deinit roots
+  if roots.len == 0:
+    deinit roots
 
   when not defined(nimStressOrc):
     # compute the threshold based on the previous history
     # of the cycle collector's effectiveness:
     # we're effective when we collected 50% or more of the nodes
     # we touched. If we're effective, we can reset the threshold:
-    if j.keepThreshold and rootsThreshold <= defaultThreshold:
+    if j.keepThreshold:
       discard
     elif j.freed * 2 >= j.touched:
       when not defined(nimFixedOrc):
         rootsThreshold = max(rootsThreshold div 3 * 2, 16)
       else:
-        rootsThreshold = defaultThreshold
+        rootsThreshold = 0
       #cfprintf(cstderr, "[collectCycles] freed %ld, touched %ld new threshold %ld\n", j.freed, j.touched, rootsThreshold)
     elif rootsThreshold < high(int) div 4:
-      rootsThreshold = rootsThreshold * 3 div 2
+      rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold) * 3 div 2
   when logOrc:
     cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld touched: %ld mem: %ld rcSum: %ld edges: %ld\n", j.freed, rootsThreshold, j.touched,
       getOccupiedMem(), j.rcSum, j.edges)
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+when defined(nimOrcStats):
+  type
+    OrcStats* = object ## Statistics of the cycle collector subsystem.
+      freedCyclicObjects*: int ## Number of freed cyclic objects.
+  proc GC_orcStats*(): OrcStats =
+    ## Returns the statistics of the cycle collector subsystem.
+    result = OrcStats(freedCyclicObjects: freedCyclicObjects)
 
 proc registerCycle(s: Cell; desc: PNimTypeV2) =
   s.rootIdx = roots.len+1
   if roots.d == nil: init(roots)
   add(roots, s, desc)
 
-  if roots.len >= rootsThreshold:
+  if roots.len - defaultThreshold >= rootsThreshold:
     collectCycles()
   when logOrc:
     writeCell("[added root]", s, desc)
@@ -424,13 +457,13 @@ proc GC_runOrc* =
   orcAssert roots.len == 0, "roots not empty!"
 
 proc GC_enableOrc*() =
-  ## Enables the cycle collector subsystem of `--gc:orc`. This is a `--gc:orc`
+  ## Enables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
   ## specific API. Check with `when defined(gcOrc)` for its existence.
   when not defined(nimStressOrc):
-    rootsThreshold = defaultThreshold
+    rootsThreshold = 0
 
 proc GC_disableOrc*() =
-  ## Disables the cycle collector subsystem of `--gc:orc`. This is a `--gc:orc`
+  ## Disables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
   ## specific API. Check with `when defined(gcOrc)` for its existence.
   when not defined(nimStressOrc):
     rootsThreshold = high(int)
@@ -441,16 +474,16 @@ proc GC_partialCollect*(limit: int) =
   partialCollect(limit)
 
 proc GC_fullCollect* =
-  ## Forces a full garbage collection pass. With `--gc:orc` triggers the cycle
+  ## Forces a full garbage collection pass. With `--mm:orc` triggers the cycle
   ## collector. This is an alias for `GC_runOrc`.
   collectCycles()
 
 proc GC_enableMarkAndSweep*() =
-  ## For `--gc:orc` an alias for `GC_enableOrc`.
+  ## For `--mm:orc` an alias for `GC_enableOrc`.
   GC_enableOrc()
 
 proc GC_disableMarkAndSweep*() =
-  ## For `--gc:orc` an alias for `GC_disableOrc`.
+  ## For `--mm:orc` an alias for `GC_disableOrc`.
   GC_disableOrc()
 
 const
@@ -485,6 +518,19 @@ proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
     #if cell.color == colPurple:
     rememberCycle(result, cell, cast[ptr PNimTypeV2](p)[])
 
+proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    if result:
+      if cell.rootIdx > 0:
+        unregisterCycle(cell)
+
 proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
   if p != nil:
     var cell = head(p)
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
index 4817059be..5509d0070 100644
--- a/lib/system/osalloc.nim
+++ b/lib/system/osalloc.nim
@@ -29,8 +29,8 @@ const doNotUnmap = not (defined(amd64) or defined(i386)) or
 
 
 when defined(nimAllocPagesViaMalloc):
-  when not defined(gcArc) and not defined(gcOrc):
-    {.error: "-d:nimAllocPagesViaMalloc is only supported with --gc:arc or --gc:orc".}
+  when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+    {.error: "-d:nimAllocPagesViaMalloc is only supported with --mm:arc or --mm:atomicArc or --mm:orc".}
 
   proc osTryAllocPages(size: int): pointer {.inline.} =
     let base = c_malloc(csize_t size + PageSize - 1 + sizeof(uint32))
@@ -80,12 +80,12 @@ elif defined(emscripten) and not defined(StandaloneHeapSize):
     let pos = cast[int](result)
 
     # Convert pointer to PageSize correct one.
-    var new_pos = cast[ByteAddress](pos) +% (PageSize - (pos %% PageSize))
+    var new_pos = cast[int](pos) +% (PageSize - (pos %% PageSize))
     if (new_pos-pos) < sizeof(EmscriptenMMapBlock):
       new_pos = new_pos +% PageSize
     result = cast[pointer](new_pos)
 
-    var mmapDescrPos = cast[ByteAddress](result) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](result) -% sizeof(EmscriptenMMapBlock)
 
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     mmapDescr.realSize = realSize
@@ -96,7 +96,7 @@ elif defined(emscripten) and not defined(StandaloneHeapSize):
   proc osTryAllocPages(size: int): pointer = osAllocPages(size)
 
   proc osDeallocPages(p: pointer, size: int) {.inline.} =
-    var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescrPos = cast[int](p) -% sizeof(EmscriptenMMapBlock)
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     munmap(mmapDescr.realPointer, mmapDescr.realSize)
 
diff --git a/lib/system/rawquits.nim b/lib/system/rawquits.nim
new file mode 100644
index 000000000..f0ead10c6
--- /dev/null
+++ b/lib/system/rawquits.nim
@@ -0,0 +1,27 @@
+import system/ctypes
+
+when defined(nimNoQuit):
+  proc rawQuit(errorcode: int = QuitSuccess) = discard "ignoring quit"
+
+elif defined(genode):
+  import genode/env
+
+  var systemEnv {.exportc: runtimeEnvSym.}: GenodeEnvPtr
+
+  type GenodeEnv = GenodeEnvPtr
+    ## Opaque type representing Genode environment.
+
+  proc rawQuit(env: GenodeEnv; errorcode: int) {.magic: "Exit", noreturn,
+    importcpp: "#->parent().exit(@); Genode::sleep_forever()", header: "<base/sleep.h>".}
+
+  proc rawQuit(errorcode: int = QuitSuccess) {.inline, noreturn.} =
+    systemEnv.rawQuit(errorcode)
+
+
+elif defined(js) and defined(nodejs) and not defined(nimscript):
+  proc rawQuit(errorcode: int = QuitSuccess) {.magic: "Exit",
+    importc: "process.exit", noreturn.}
+
+else:
+  proc rawQuit(errorcode: cint) {.
+    magic: "Exit", importc: "exit", header: "<stdlib.h>", noreturn.}
\ No newline at end of file
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index e47220656..13118e40b 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -17,7 +17,7 @@ proc reprFloat(x: float): string {.compilerproc.} = return $x
 
 proc reprPointer(x: pointer): string {.compilerproc.} =
   result = newString(60)
-  let n = c_sprintf(cast[cstring](addr result[0]), "%p", x)
+  let n = c_snprintf(cast[cstring](addr result[0]), csize_t(60), "%p", x)
   setLen(result, n)
 
 proc reprStrAux(result: var string, s: cstring; len: int) =
@@ -155,7 +155,7 @@ when not defined(useNimRtl):
     var bs = typ.base.size
     for i in 0..typ.size div bs - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), typ.base, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), typ.base, cl)
     add result, "]"
 
   when defined(nimSeqsV2):
@@ -183,7 +183,7 @@ when not defined(useNimRtl):
     var bs = typ.base.size
     for i in 0..cast[PGenericSeq](p).len-1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
+      reprAux(result, cast[pointer](cast[int](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
               typ.base, cl)
     add result, "]"
 
@@ -194,14 +194,14 @@ when not defined(useNimRtl):
     of nkSlot:
       add result, $n.name
       add result, " = "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
     of nkList:
       for i in 0..n.len-1:
         if i > 0: add result, ",\n"
         reprRecordAux(result, p, n.sons[i], cl)
     of nkCase:
       var m = selectBranch(p, n)
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
       if m != nil: reprRecordAux(result, p, m, cl)
 
   proc reprRecord(result: var string, p: pointer, typ: PNimType,
@@ -307,7 +307,7 @@ when not defined(useNimRtl):
     var bs = elemtyp.size
     for i in 0..length - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[ByteAddress](p) + i*bs), elemtyp, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), elemtyp, cl)
     add result, "]"
     deinitReprClosure(cl)
 
diff --git a/lib/system/repr_v2.nim b/lib/system/repr_v2.nim
index f6c720e2c..d2aef536c 100644
--- a/lib/system/repr_v2.nim
+++ b/lib/system/repr_v2.nim
@@ -9,6 +9,9 @@ proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
 proc distinctBase(T: typedesc, recursive: static bool = true): typedesc {.magic: "TypeTrait".}
   ## imported from typetraits
 
+proc rangeBase(T: typedesc): typedesc {.magic: "TypeTrait".}
+  # skip one level of range; return the base type of a range type
+
 proc repr*(x: NimNode): string {.magic: "Repr", noSideEffect.}
 
 proc repr*(x: int): string =
@@ -35,9 +38,10 @@ proc repr*(x: char): string {.noSideEffect, raises: [].} =
   ## repr for a character argument. Returns `x`
   ## converted to an escaped string.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   assert repr('c') == "'c'"
-  result.add '\''
+  ##   ```
+  result = "'"
   # Elides string creations if not needed
   if x in {'\\', '\0'..'\31', '\127'..'\255'}:
     result.add '\\'
@@ -50,7 +54,7 @@ proc repr*(x: char): string {.noSideEffect, raises: [].} =
 proc repr*(x: string | cstring): string {.noSideEffect, raises: [].} =
   ## repr for a string argument. Returns `x`
   ## converted to a quoted and escaped string.
-  result.add '\"'
+  result = "\""
   for i in 0..<x.len:
     if x[i] in {'"', '\\', '\0'..'\31', '\127'..'\255'}:
       result.add '\\'
@@ -91,12 +95,17 @@ proc repr*(p: pointer): string =
         result[j] = HexChars[n and 0xF]
         n = n shr 4
 
-proc repr*(p: proc): string =
+proc repr*(p: proc | iterator {.closure.}): string =
   ## repr of a proc as its address
   repr(cast[ptr pointer](unsafeAddr p)[])
 
-template repr*(x: distinct): string =
-  repr(distinctBase(typeof(x))(x))
+template repr*[T: distinct|(range and not enum)](x: T): string =
+  when T is range: # add a branch to handle range
+    repr(rangeBase(typeof(x))(x))
+  elif T is distinct:
+    repr(distinctBase(typeof(x))(x))
+  else:
+    {.error: "cannot happen".}
 
 template repr*(t: typedesc): string = $t
 
@@ -124,11 +133,11 @@ proc reprObject[T: tuple|object](res: var string, x: T) {.noSideEffect, raises:
 proc repr*[T: tuple|object](x: T): string {.noSideEffect, raises: [].} =
   ## Generic `repr` operator for tuples that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(23, 45) == "(23, 45)"
   ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
   ##   $() == "()"
+  ##   ```
   when T is object:
     result = $typeof(x)
   reprObject(result, x)
@@ -156,29 +165,19 @@ proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string {.noSi
 proc repr*[T](x: set[T]): string =
   ## Generic `repr` operator for sets that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   ${23, 45} == "{23, 45}"
+  ##   ```
   collectionToRepr(x, "{", ", ", "}")
 
 proc repr*[T](x: seq[T]): string =
   ## Generic `repr` operator for seqs that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
   collectionToRepr(x, "@[", ", ", "]")
 
-proc repr*[T, U](x: HSlice[T, U]): string =
-  ## Generic `repr` operator for slices that is lifted from the components
-  ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
-  ##  $(1 .. 5) == "1 .. 5"
-  result = repr(x.a)
-  result.add(" .. ")
-  result.add(repr(x.b))
-
 proc repr*[T, IDX](x: array[IDX, T]): string =
   ## Generic `repr` operator for arrays that is lifted from the components.
   collectionToRepr(x, "[", ", ", "]")
@@ -186,9 +185,9 @@ proc repr*[T, IDX](x: array[IDX, T]): string =
 proc repr*[T](x: openArray[T]): string =
   ## Generic `repr` operator for openarrays that is lifted from the components
   ## of `x`. Example:
-  ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
   collectionToRepr(x, "[", ", ", "]")
 
 proc repr*[T](x: UncheckedArray[T]): string =
diff --git a/lib/system/reprjs.nim b/lib/system/reprjs.nim
index 0818f9cc9..761d66aec 100644
--- a/lib/system/reprjs.nim
+++ b/lib/system/reprjs.nim
@@ -12,6 +12,8 @@ when defined(nimPreviewSlimSystem):
   import std/formatfloat
 
 proc reprInt(x: int64): string {.compilerproc.} = $x
+proc reprInt(x: uint64): string {.compilerproc.} = $x
+proc reprInt(x: int): string {.compilerproc.} = $x
 proc reprFloat(x: float): string {.compilerproc.} = $x
 
 proc reprPointer(p: pointer): string {.compilerproc.} =
@@ -27,7 +29,7 @@ proc reprBool(x: bool): string {.compilerRtl.} =
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
   var tmp: bool
   let item = typ.node.sons[e]
-  {.emit: "`tmp` = `item` !== undefined".}
+  {.emit: "`tmp` = `item` !== undefined;".}
   if tmp:
     result = makeNimstrLit(item.name)
   else:
@@ -134,7 +136,7 @@ proc reprArray(a: pointer, typ: PNimType,
   add(result, "]")
 
 proc isPointedToNil(p: pointer): bool =
-  {. emit: "if (`p` === null) {`result` = true};\n" .}
+  {. emit: "if (`p` === null) {`result` = true;}\n" .}
 
 proc reprRef(result: var string, p: pointer, typ: PNimType,
           cl: var ReprClosure) =
@@ -192,8 +194,12 @@ proc reprAux(result: var string, p: pointer, typ: PNimType,
     return
   dec(cl.recDepth)
   case typ.kind
-  of tyInt..tyInt64, tyUInt..tyUInt64:
+  of tyInt..tyInt32, tyUInt..tyUInt32:
     add(result, reprInt(cast[int](p)))
+  of tyInt64:
+    add(result, reprInt(cast[int64](p)))
+  of tyUInt64:
+    add(result, reprInt(cast[uint64](p)))
   of tyChar:
     add(result, reprChar(cast[char](p)))
   of tyBool:
diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim
index 40fd50b48..572e77408 100644
--- a/lib/system/seqs_v2.nim
+++ b/lib/system/seqs_v2.nim
@@ -8,10 +8,13 @@
 #
 
 
-# import typetraits
+# import std/typetraits
 # strs already imported allocateds for us.
 
-proc supportsCopyMem(t: typedesc): bool {.magic: "TypeTrait".}
+
+# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
+{.push warning[StrictNotNil]: off.}  # See https://github.com/nim-lang/Nim/issues/21401
+
 
 ## Default seq implementation used by Nim's core.
 type
@@ -27,6 +30,10 @@ type
     len: int
     p: ptr NimSeqPayload[T]
 
+  NimRawSeq = object
+    len: int
+    p: pointer
+
 const nimSeqVersion {.core.} = 2
 
 # XXX make code memory safe for overflows in '*'
@@ -41,6 +48,15 @@ proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises
   else:
     result = nil
 
+proc newSeqPayloadUninit(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # Used in `newSeqOfCap()`.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
 template `+!`(p: pointer, s: int): pointer =
   cast[pointer](cast[int](p) +% s)
 
@@ -48,7 +64,7 @@ template `-!`(p: pointer, s: int): pointer =
   cast[pointer](cast[int](p) -% s)
 
 proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
-    noSideEffect, raises: [], compilerRtl.} =
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
   {.noSideEffect.}:
     let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
     if addlen <= 0:
@@ -61,15 +77,48 @@ proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): poin
       var p = cast[ptr NimSeqPayloadBase](p)
       let oldCap = p.cap and not strlitFlag
       let newCap = max(resize(oldCap), len+addlen)
+      var q: ptr NimSeqPayloadBase
+      if (p.cap and strlitFlag) == strlitFlag:
+        q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+
+      zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+      q.cap = newCap
+      result = q
+
+proc zeroNewElements(len: int; q: pointer; addlen, elemSize, elemAlign: int) {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+
+proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayloadUninit(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
       if (p.cap and strlitFlag) == strlitFlag:
-        var q = cast[ptr NimSeqPayloadBase](alignedAlloc0(headerSize + elemSize * newCap, elemAlign))
+        var q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
         copyMem(q +! headerSize, p +! headerSize, len * elemSize)
         q.cap = newCap
         result = q
       else:
         let oldSize = headerSize + elemSize * oldCap
         let newSize = headerSize + elemSize * newCap
-        var q = cast[ptr NimSeqPayloadBase](alignedRealloc0(p, oldSize, newSize, elemAlign))
+        var q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
         q.cap = newCap
         result = q
 
@@ -83,38 +132,41 @@ proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [].} =
       for i in countdown(x.len - 1, newLen):
         reset x[i]
     # XXX This is wrong for const seqs that were moved into 'x'!
-    cast[ptr NimSeqV2[T]](addr x).len = newLen
+    {.noSideEffect.}:
+      cast[ptr NimSeqV2[T]](addr x).len = newLen
 
-proc grow*[T](x: var seq[T]; newLen: Natural; value: T) =
+proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
   let oldLen = x.len
   #sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
   if newLen <= oldLen: return
   var xu = cast[ptr NimSeqV2[T]](addr x)
-  if xu.p == nil or xu.p.cap < newLen:
-    xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
+  if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
+    xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
   xu.len = newLen
   for i in oldLen .. newLen-1:
-    xu.p.data[i] = value
+    wasMoved(xu.p.data[i])
+    `=copy`(xu.p.data[i], value)
 
-proc add*[T](x: var seq[T]; value: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
+proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
   ## Generic proc for adding a data item `y` to a container `x`.
   ##
   ## For containers that have an order, `add` means *append*. New generic
   ## containers should also call their adding proc `add` for consistency.
   ## Generic code becomes much easier to write if the Nim naming scheme is
   ## respected.
-  let oldLen = x.len
-  var xu = cast[ptr NimSeqV2[T]](addr x)
-  if xu.p == nil or xu.p.cap < oldLen+1:
-    xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, 1, sizeof(T), alignof(T)))
-  xu.len = oldLen+1
-  # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
-  # copyMem(). This is fine as know by construction that
-  # in `xu.p.data[oldLen]` there is nothing to destroy.
-  # We also save the `wasMoved + destroy` pair for the sink parameter.
-  xu.p.data[oldLen] = value
-
-proc setLen[T](s: var seq[T], newlen: Natural) =
+  {.cast(noSideEffect).}:
+    let oldLen = x.len
+    var xu = cast[ptr NimSeqV2[T]](addr x)
+    if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
+      xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
+    xu.len = oldLen+1
+    # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
+    # copyMem(). This is fine as know by construction that
+    # in `xu.p.data[oldLen]` there is nothing to destroy.
+    # We also save the `wasMoved + destroy` pair for the sink parameter.
+    xu.p.data[oldLen] = y
+
+proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
   {.noSideEffect.}:
     if newlen < s.len:
       shrink(s, newlen)
@@ -122,8 +174,8 @@ proc setLen[T](s: var seq[T], newlen: Natural) =
       let oldLen = s.len
       if newlen <= oldLen: return
       var xu = cast[ptr NimSeqV2[T]](addr s)
-      if xu.p == nil or xu.p.cap < newlen:
-        xu.p = cast[typeof(xu.p)](prepareSeqAdd(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
       xu.len = newlen
       for i in oldLen..<newlen:
         xu.p.data[i] = default(T)
@@ -132,9 +184,9 @@ proc newSeq[T](s: var seq[T], len: Natural) =
   shrink(s, 0)
   setLen(s, len)
 
+proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
+  result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
 
-template capacityImpl(sek: NimSeqV2): int =
-  if sek.p != nil: sek.p.cap else: 0
 
 func capacity*[T](self: seq[T]): int {.inline.} =
   ## Returns the current capacity of the seq.
@@ -144,6 +196,32 @@ func capacity*[T](self: seq[T]): int {.inline.} =
     lst.add "Nim"
     assert lst.capacity == 42
 
-  {.cast(noSideEffect).}:
-    let sek = unsafeAddr self
-    result = capacityImpl(cast[ptr NimSeqV2](sek)[])
+  let sek = cast[ptr NimSeqV2[T]](unsafeAddr self)
+  result = if sek.p != nil: sek.p.cap and not strlitFlag else: 0
+
+func setLenUninit*[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  ## Sets the length of seq `s` to `newlen`. `T` may be any sequence type.
+  ## New slots will not be initialized.
+  ##
+  ## If the current length is greater than the new length,
+  ## `s` will be truncated.
+  ##   ```nim
+  ##   var x = @[10, 20]
+  ##   x.setLenUninit(5)
+  ##   x[4] = 50
+  ##   assert x[4] == 50
+  ##   x.setLenUninit(1)
+  ##   assert x == @[10]
+  ##   ```
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+
+{.pop.}  # See https://github.com/nim-lang/Nim/issues/21401
diff --git a/lib/system/setops.nim b/lib/system/setops.nim
index b42c8b4a8..67aa3097a 100644
--- a/lib/system/setops.nim
+++ b/lib/system/setops.nim
@@ -26,9 +26,9 @@ func excl*[T](x: var set[T], y: T) {.magic: "Excl".} =
   ##
   ## This is the same as `x = x - {y}`, but it might be more efficient.
   runnableExamples:
-    var b = {2, 3, 5, 6, 12, 545}
+    var b = {2, 3, 5, 6, 12, 54}
     b.excl(5)
-    assert b == {2, 3, 6, 12, 545}
+    assert b == {2, 3, 6, 12, 54}
 
 template excl*[T](x: var set[T], y: set[T]) {.callsite.} =
   ## Excludes the set `y` from the set `x`.
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 103c8d343..97431c296 100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -9,18 +9,20 @@
 
 # set handling
 
-type
-  NimSet = array[0..4*2048-1, uint8]
 
-
-proc cardSet(s: NimSet, len: int): int {.compilerproc, inline.} =
+proc cardSetImpl(s: ptr UncheckedArray[uint8], len: int): int {.inline.} =
   var i = 0
   result = 0
+  var num = 0'u64
   when defined(x86) or defined(amd64):
     while i < len - 8:
-      inc(result, countBits64((cast[ptr uint64](s[i].unsafeAddr))[]))
+      copyMem(addr num, addr s[i], 8)
+      inc(result, countBits64(num))
       inc(i, 8)
 
   while i < len:
     inc(result, countBits32(uint32(s[i])))
     inc(i, 1)
+
+proc cardSet(s: ptr UncheckedArray[uint8], len: int): int {.compilerproc, inline.} =
+  result = cardSetImpl(s, len)
diff --git a/lib/system/strmantle.nim b/lib/system/strmantle.nim
index 0b2578280..89046253b 100644
--- a/lib/system/strmantle.nim
+++ b/lib/system/strmantle.nim
@@ -23,6 +23,14 @@ proc cmpStrings(a, b: string): int {.inline, compilerproc.} =
   else:
     result = alen - blen
 
+proc leStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) <= 0
+
+proc ltStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) < 0
+
 proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
   let alen = a.len
   let blen = b.len
@@ -33,7 +41,7 @@ proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
 proc hashString(s: string): int {.compilerproc.} =
   # the compiler needs exactly the same hash function!
   # this used to be used for efficient generation of string case statements
-  var h : uint = 0
+  var h = 0'u
   for i in 0..len(s)-1:
     h = h + uint(s[i])
     h = h + h shl 10
@@ -75,8 +83,8 @@ const
               1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
               1e20, 1e21, 1e22]
 
-when defined(nimHasInvariant):
-  {.push staticBoundChecks: off.}
+
+{.push staticBoundChecks: off.}
 
 proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
                          ): int {.compilerproc.} =
@@ -178,7 +186,9 @@ proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
 
   # if exponent greater than can be represented: +/- zero or infinity
   if absExponent > 999:
-    if expNegative:
+    if integer == 0:
+      number = 0.0
+    elif expNegative:
       number = 0.0*sign
     else:
       number = Inf*sign
@@ -234,8 +244,7 @@ proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
   t[ti-3] = ('0'.ord + absExponent mod 10).char
   number = c_strtod(cast[cstring](addr t), nil)
 
-when defined(nimHasInvariant):
-  {.pop.} # staticBoundChecks
+{.pop.} # staticBoundChecks
 
 proc nimBoolToStr(x: bool): string {.compilerRtl.} =
   return if x: "true" else: "false"
diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim
index 429724dab..404b4f78d 100644
--- a/lib/system/strs_v2.nim
+++ b/lib/system/strs_v2.nim
@@ -34,53 +34,72 @@ template frees(s) =
     else:
       dealloc(s.p)
 
+template allocPayload(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc(contentSize(newLen)))
+
+template allocPayload0(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared0(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc0(contentSize(newLen)))
+
+template reallocPayload(p: pointer, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared(p, contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc(p, contentSize(newLen)))
+
+template reallocPayload0(p: pointer; oldLen, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared0(p, contentSize(oldLen), contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc0(p, contentSize(oldLen), contentSize(newLen)))
+
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
   elif old <= high(int16): result = old * 2
   else: result = old * 3 div 2 # for large arrays * 3/2 is better
 
-proc prepareAdd(s: var NimStringV2; addlen: int) {.compilerRtl.} =
-  let newLen = s.len + addlen
+proc prepareAdd(s: var NimStringV2; addLen: int) {.compilerRtl.} =
+  let newLen = s.len + addLen
   if isLiteral(s):
     let oldP = s.p
     # can't mutate a literal, so we need a fresh copy here:
-    when compileOption("threads"):
-      s.p = cast[ptr NimStrPayload](allocShared0(contentSize(newLen)))
-    else:
-      s.p = cast[ptr NimStrPayload](alloc0(contentSize(newLen)))
+    s.p = allocPayload(newLen)
     s.p.cap = newLen
     if s.len > 0:
       # we are about to append, so there is no need to copy the \0 terminator:
       copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+    elif oldP == nil:
+      # In the case of `newString(0) & ""`, since `src.len == 0`, `appendString`
+      # will not set the `\0` terminator, so we set it here.
+      s.p.data[0] = '\0'
   else:
     let oldCap = s.p.cap and not strlitFlag
     if newLen > oldCap:
       let newCap = max(newLen, resize(oldCap))
-      when compileOption("threads"):
-        s.p = cast[ptr NimStrPayload](reallocShared0(s.p, contentSize(oldCap), contentSize(newCap)))
-      else:
-        s.p = cast[ptr NimStrPayload](realloc0(s.p, contentSize(oldCap), contentSize(newCap)))
+      s.p = reallocPayload(s.p, newCap)
       s.p.cap = newCap
+      if newLen < newCap:
+        zeroMem(cast[pointer](addr s.p.data[newLen+1]), newCap - newLen)
 
-proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl, inline.} =
+proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl, inl.} =
   #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
   prepareAdd(s, 1)
   s.p.data[s.len] = c
-  s.p.data[s.len+1] = '\0'
   inc s.len
+  s.p.data[s.len] = '\0'
 
 proc toNimStr(str: cstring, len: int): NimStringV2 {.compilerproc.} =
   if len <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    when compileOption("threads"):
-      var p = cast[ptr NimStrPayload](allocShared0(contentSize(len)))
-    else:
-      var p = cast[ptr NimStrPayload](alloc0(contentSize(len)))
+    var p = allocPayload(len)
     p.cap = len
-    if len > 0:
-      # we are about to append, so there is no need to copy the \0 terminator:
-      copyMem(unsafeAddr p.data[0], str, len)
+    copyMem(unsafeAddr p.data[0], str, len+1)
     result = NimStringV2(len: len, p: p)
 
 proc cstrToNimstr(str: cstring): NimStringV2 {.compilerRtl.} =
@@ -99,29 +118,24 @@ proc appendString(dest: var NimStringV2; src: NimStringV2) {.compilerproc, inlin
 
 proc appendChar(dest: var NimStringV2; c: char) {.compilerproc, inline.} =
   dest.p.data[dest.len] = c
-  dest.p.data[dest.len+1] = '\0'
   inc dest.len
+  dest.p.data[dest.len] = '\0'
 
 proc rawNewString(space: int): NimStringV2 {.compilerproc.} =
   # this is also 'system.newStringOfCap'.
   if space <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    when compileOption("threads"):
-      var p = cast[ptr NimStrPayload](allocShared0(contentSize(space)))
-    else:
-      var p = cast[ptr NimStrPayload](alloc0(contentSize(space)))
+    var p = allocPayload(space)
     p.cap = space
+    p.data[0] = '\0'
     result = NimStringV2(len: 0, p: p)
 
 proc mnewString(len: int): NimStringV2 {.compilerproc.} =
   if len <= 0:
     result = NimStringV2(len: 0, p: nil)
   else:
-    when compileOption("threads"):
-      var p = cast[ptr NimStrPayload](allocShared0(contentSize(len)))
-    else:
-      var p = cast[ptr NimStrPayload](alloc0(contentSize(len)))
+    var p = allocPayload0(len)
     p.cap = len
     result = NimStringV2(len: len, p: p)
 
@@ -129,13 +143,30 @@ proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
   if newLen == 0:
     discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
   else:
-    if newLen > s.len or isLiteral(s):
-      prepareAdd(s, newLen - s.len)
+    if isLiteral(s):
+      let oldP = s.p
+      s.p = allocPayload(newLen)
+      s.p.cap = newLen
+      if s.len > 0:
+        copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+        if newLen > s.len:
+          zeroMem(cast[pointer](addr s.p.data[s.len]), newLen - s.len + 1)
+        else:
+          s.p.data[newLen] = '\0'
+      else:
+        zeroMem(cast[pointer](addr s.p.data[0]), newLen + 1)
+    elif newLen > s.len:
+      let oldCap = s.p.cap and not strlitFlag
+      if newLen > oldCap:
+        let newCap = max(newLen, resize(oldCap))
+        s.p = reallocPayload0(s.p, oldCap, newCap)
+        s.p.cap = newCap
+
     s.p.data[newLen] = '\0'
   s.len = newLen
 
 proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
-  if a.p == b.p: return
+  if a.p == b.p and a.len == b.len: return
   if isLiteral(b):
     # we can shallow copy literals:
     frees(a)
@@ -147,10 +178,7 @@ proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
       # 'let y = newStringOfCap(); var x = y'
       # on the other hand... These get turned into moves now.
       frees(a)
-      when compileOption("threads"):
-        a.p = cast[ptr NimStrPayload](allocShared0(contentSize(b.len)))
-      else:
-        a.p = cast[ptr NimStrPayload](alloc0(contentSize(b.len)))
+      a.p = allocPayload(b.len)
       a.p.cap = b.len
     a.len = b.len
     copyMem(unsafeAddr a.p.data[0], unsafeAddr b.p.data[0], b.len+1)
@@ -158,14 +186,11 @@ proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
 proc nimPrepareStrMutationImpl(s: var NimStringV2) =
   let oldP = s.p
   # can't mutate a literal, so we need a fresh copy here:
-  when compileOption("threads"):
-    s.p = cast[ptr NimStrPayload](allocShared0(contentSize(s.len)))
-  else:
-    s.p = cast[ptr NimStrPayload](alloc0(contentSize(s.len)))
+  s.p = allocPayload(s.len)
   s.p.cap = s.len
   copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1)
 
-proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inline.} =
+proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl.} =
   if s.p != nil and (s.p.cap and strlitFlag) == strlitFlag:
     nimPrepareStrMutationImpl(s)
 
@@ -176,9 +201,16 @@ proc prepareMutation*(s: var string) {.inline.} =
     let s = unsafeAddr s
     nimPrepareStrMutationV2(cast[ptr NimStringV2](s)[])
 
+proc nimAddStrV1(s: var NimStringV2; src: NimStringV2) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, src.len)
+  appendString s, src
+
+proc nimDestroyStrV1(s: NimStringV2) {.compilerRtl, inl.} =
+  frees(s)
 
-template capacityImpl(str: NimStringV2): int =
-  if str.p != nil: str.p.cap else: 0
+proc nimStrAtLe(s: string; idx: int; ch: char): bool {.compilerRtl, inl.} =
+  result = idx < s.len and s[idx] <= ch
 
 func capacity*(self: string): int {.inline.} =
   ## Returns the current capacity of the string.
@@ -188,6 +220,5 @@ func capacity*(self: string): int {.inline.} =
     str.add "Nim"
     assert str.capacity == 42
 
-  {.cast(noSideEffect).}:
-    let str = unsafeAddr self
-    result = capacityImpl(cast[ptr NimStringV2](str)[])
+  let str = cast[ptr NimStringV2](unsafeAddr self)
+  result = if str.p != nil: str.p.cap and not strlitFlag else: 0
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index be32652d8..3621c4960 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -17,10 +17,10 @@
 
 
 proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
-  cast[pointer](cast[ByteAddress](a) +% align(GenericSeqSize, elemAlign))
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
 
 proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
-  cast[pointer](cast[ByteAddress](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
 
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
@@ -47,27 +47,22 @@ else:
   template allocStrNoInit(size: untyped): untyped =
     cast[NimString](newObjNoInit(addr(strDesc), size))
 
-proc rawNewStringNoInit(space: int): NimString {.compilerproc.} =
-  var s = space
-  if s < 7: s = 7
+proc rawNewStringNoInit(space: int): NimString =
+  let s = max(space, 7)
   result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
-  result.len = 0
   when defined(gogc):
     result.elemSize = 1
 
 proc rawNewString(space: int): NimString {.compilerproc.} =
-  var s = space
-  if s < 7: s = 7
-  result = allocStr(sizeof(TGenericSeq) + s + 1)
-  result.reserved = s
+  result = rawNewStringNoInit(space)
   result.len = 0
-  when defined(gogc):
-    result.elemSize = 1
+  result.data[0] = '\0'
 
 proc mnewString(len: int): NimString {.compilerproc.} =
-  result = rawNewString(len)
+  result = rawNewStringNoInit(len)
   result.len = len
+  zeroMem(addr result.data[0], len + 1)
 
 proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
   # This is not used by most recent versions of the compiler anymore, but
@@ -75,13 +70,10 @@ proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
   let start = max(start, 0)
   if s == nil: return nil
   let len = min(last, s.len-1) - start + 1
-  if len > 0:
-    result = rawNewStringNoInit(len)
-    result.len = len
-    copyMem(addr(result.data), addr(s.data[start]), len)
-    result.data[len] = '\0'
-  else:
-    result = rawNewString(len)
+  result = rawNewStringNoInit(len)
+  result.len = len
+  copyMem(addr(result.data), addr(s.data[start]), len)
+  result.data[len] = '\0'
 
 proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
   # This is not used by most recent versions of the compiler anymore, but
@@ -96,7 +88,8 @@ proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inli
 proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
   result = rawNewStringNoInit(len)
   result.len = len
-  copyMem(addr(result.data), str, len + 1)
+  copyMem(addr(result.data), str, len)
+  result.data[len] = '\0'
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
   if str == nil: NimString(nil)
@@ -201,7 +194,7 @@ proc addChar(s: NimString, c: char): NimString =
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
   if dest == nil:
-    result = rawNewStringNoInit(addlen)
+    result = rawNewString(addlen)
   elif dest.len + addlen <= dest.space:
     result = dest
   else: # slow path:
@@ -227,15 +220,18 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
   let n = max(newLen, 0)
   if s == nil:
-    result = mnewString(newLen)
+    if n == 0:
+      return s
+    else:
+      result = mnewString(n)
   elif n <= s.space:
     result = s
   else:
-    let sp = max(resize(s.space), newLen)
+    let sp = max(resize(s.space), n)
     result = rawNewStringNoInit(sp)
     result.len = s.len
-    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
-    zeroMem(addr result.data[s.len], newLen - s.len)
+    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len)
+    zeroMem(addr result.data[s.len], n - s.len)
     result.reserved = sp
   result.len = n
   result.data[n] = '\0'
@@ -308,7 +304,10 @@ proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
     compilerRtl.} =
   sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
   if s == nil:
-    result = cast[PGenericSeq](newSeq(typ, newLen))
+    if newLen == 0:
+      result = s
+    else:
+      result = cast[PGenericSeq](newSeq(typ, newLen))
   else:
     let elemSize = typ.base.size
     let elemAlign = typ.base.align
@@ -340,3 +339,25 @@ proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
       result = s
       zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
     result.len = newLen
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[NimString](self)
+  result = if str != nil: str.space else: 0
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[PGenericSeq](self)
+  result = if sek != nil: sek.space else: 0
diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim
index 94db23336..285b8f5e7 100644
--- a/lib/system/threadimpl.nim
+++ b/lib/system/threadimpl.nim
@@ -20,13 +20,8 @@ when not defined(useNimRtl):
       threadType = ThreadType.NimThread
 
 when defined(gcDestructors):
-  proc allocThreadStorage(size: int): pointer =
-    result = c_malloc(csize_t size)
-    zeroMem(result, size)
-
   proc deallocThreadStorage(p: pointer) = c_free(p)
 else:
-  template allocThreadStorage(size: untyped): untyped = allocShared0(size)
   template deallocThreadStorage(p: pointer) = deallocShared(p)
 
 template afterThreadRuns() =