26 files changed, 711 insertions, 527 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index fd3ced832..ad3419808 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -8,7 +8,7 @@
 #
 
 # Low level allocator for Nim. Has been designed to support the GC.
-# TODO: 
+# TODO:
 # - eliminate "used" field
 # - make searching for block O(1)
 {.push profiler:off.}
@@ -21,37 +21,37 @@
 # used with a size of 0:
 const weirdUnmap = not (defined(amd64) or defined(i386)) or defined(windows)
 
-when defined(posix): 
+when defined(posix):
   const
-    PROT_READ  = 1             # page can be read 
-    PROT_WRITE = 2             # page can be written 
-    MAP_PRIVATE = 2'i32        # Changes are private 
-  
+    PROT_READ  = 1             # page can be read
+    PROT_WRITE = 2             # page can be written
+    MAP_PRIVATE = 2'i32        # Changes are private
+
   when defined(macosx) or defined(bsd):
     const MAP_ANONYMOUS = 0x1000
-  elif defined(solaris): 
+  elif defined(solaris):
     const MAP_ANONYMOUS = 0x100
   else:
     var
       MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
-    
+
   proc mmap(adr: pointer, len: int, prot, flags, fildes: cint,
             off: int): pointer {.header: "<sys/mman.h>".}
 
   proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
-    result = mmap(nil, size, PROT_READ or PROT_WRITE, 
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = mmap(nil, size, PROT_READ or PROT_WRITE,
                              MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
     if result == nil or result == cast[pointer](-1):
       raiseOutOfMem()
-      
+
   proc osDeallocPages(p: pointer, size: int) {.inline} =
     when reallyOsDealloc: munmap(p, size)
-  
-elif defined(windows): 
+
+elif defined(windows):
   const
-    MEM_RESERVE = 0x2000 
+    MEM_RESERVE = 0x2000
     MEM_COMMIT = 0x1000
     MEM_TOP_DOWN = 0x100000
     PAGE_READWRITE = 0x04
@@ -62,12 +62,12 @@ elif defined(windows):
   proc virtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType,
                     flProtect: int32): pointer {.
                     header: "<windows.h>", stdcall, importc: "VirtualAlloc".}
-  
-  proc virtualFree(lpAddress: pointer, dwSize: int, 
+
+  proc virtualFree(lpAddress: pointer, dwSize: int,
                    dwFreeType: int32) {.header: "<windows.h>", stdcall,
                    importc: "VirtualFree".}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
+
+  proc osAllocPages(size: int): pointer {.inline.} =
     result = virtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT,
                           PAGE_READWRITE)
     if result == nil: raiseOutOfMem()
@@ -82,7 +82,7 @@ elif defined(windows):
     when reallyOsDealloc: virtualFree(p, 0, MEM_RELEASE)
     #VirtualFree(p, size, MEM_DECOMMIT)
 
-else: 
+else:
   {.error: "Port memory manager to your platform".}
 
 # --------------------- end of non-portable code -----------------------------
@@ -97,17 +97,17 @@ const
   InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn!
   SmallChunkSize = PageSize
 
-type 
+type
   PTrunk = ptr TTrunk
-  TTrunk {.final.} = object 
+  TTrunk {.final.} = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
     bits: array[0..IntsPerTrunk-1, int] # a bit vector
-  
+
   TTrunkBuckets = array[0..255, PTrunk]
-  TIntSet {.final.} = object 
+  TIntSet {.final.} = object
     data: TTrunkBuckets
-  
+
 type
   TAlignType = BiggestFloat
   TFreeCell {.final, pure.} = object
@@ -123,14 +123,14 @@ type
     prevSize: int        # size of previous chunk; for coalescing
     size: int            # if < PageSize it is a small chunk
     used: bool           # later will be optimized into prevSize...
-  
+
   TSmallChunk = object of TBaseChunk
     next, prev: PSmallChunk  # chunks of the same size
     freeList: ptr TFreeCell
-    free: int            # how many bytes remain    
+    free: int            # how many bytes remain
     acc: int             # accumulator for small object allocation
     data: TAlignType     # start of usable memory
-  
+
   TBigChunk = object of TBaseChunk # not necessarily > PageSize!
     next, prev: PBigChunk    # chunks of the same (or bigger) size
     align: int
@@ -139,7 +139,7 @@ type
 template smallChunkOverhead(): expr = sizeof(TSmallChunk)-sizeof(TAlignType)
 template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType)
 
-proc roundup(x, v: int): int {.inline.} = 
+proc roundup(x, v: int): int {.inline.} =
   result = (x + (v-1)) and not (v-1)
   sysAssert(result >= x, "roundup: result < x")
   #return ((-x) and (v-1)) +% x
@@ -153,7 +153,7 @@ sysAssert(roundup(65, 8) == 72, "roundup broken 2")
 # endings of big chunks. This is needed by the merging operation. The only
 # remaining operation is best-fit for big chunks. Since there is a size-limit
 # for big chunks (because greater than the limit means they are returned back
-# to the OS), a fixed size array can be used. 
+# to the OS), a fixed size array can be used.
 
 type
   PLLChunk = ptr TLLChunk
@@ -163,21 +163,21 @@ type
     next: PLLChunk           # next low-level chunk; only needed for dealloc
 
   PAvlNode = ptr TAvlNode
-  TAvlNode {.pure, final.} = object 
-    link: array[0..1, PAvlNode] # Left (0) and right (1) links 
+  TAvlNode {.pure, final.} = object
+    link: array[0..1, PAvlNode] # Left (0) and right (1) links
     key, upperBound: int
     level: int
-    
+
   TMemRegion {.final, pure.} = object
     minLargeObj, maxLargeObj: int
     freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
     llmem: PLLChunk
     currMem, maxMem, freeMem: int # memory sizes (allocated from OS)
-    lastSize: int # needed for the case that OS gives us pages linearly 
+    lastSize: int # needed for the case that OS gives us pages linearly
     freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
     chunkStarts: TIntSet
     root, deleted, last, freeAvlNodes: PAvlNode
-  
+
 # shared:
 var
   bottomData: TAvlNode
@@ -191,7 +191,7 @@ proc initAllocator() =
     bottom.link[1] = bottom
 {.pop.}
 
-proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} = 
+proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} =
   inc(a.currMem, bytes)
 
 proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} =
@@ -199,11 +199,11 @@ proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} =
   dec(a.currMem, bytes)
 
 proc getMaxMem(a: var TMemRegion): int =
-  # Since we update maxPagesCount only when freeing pages, 
+  # Since we update maxPagesCount only when freeing pages,
   # maxPagesCount may not be up to date. Thus we use the
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
-  
+
 proc llAlloc(a: var TMemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
   # is done at he end of the allocator's life time.
@@ -251,15 +251,15 @@ proc llDeallocAll(a: var TMemRegion) =
     var next = it.next
     osDeallocPages(it, PageSize)
     it = next
-  
-proc intSetGet(t: TIntSet, key: int): PTrunk = 
+
+proc intSetGet(t: TIntSet, key: int): PTrunk =
   var it = t.data[key and high(t.data)]
-  while it != nil: 
+  while it != nil:
     if it.key == key: return it
     it = it.next
   result = nil
 
-proc intSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk = 
+proc intSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk =
   result = intSetGet(t, key)
   if result == nil:
     result = cast[PTrunk](llAlloc(a, sizeof(result[])))
@@ -267,20 +267,20 @@ proc intSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk =
     t.data[key and high(t.data)] = result
     result.key = key
 
-proc contains(s: TIntSet, key: int): bool = 
+proc contains(s: TIntSet, key: int): bool =
   var t = intSetGet(s, key shr TrunkShift)
-  if t != nil: 
+  if t != nil:
     var u = key and TrunkMask
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
-  else: 
+  else:
     result = false
-  
-proc incl(a: var TMemRegion, s: var TIntSet, key: int) = 
+
+proc incl(a: var TMemRegion, s: var TIntSet, key: int) =
   var t = intSetPut(a, s, key shr TrunkShift)
   var u = key and TrunkMask
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
 
-proc excl(s: var TIntSet, key: int) = 
+proc excl(s: var TIntSet, key: int) =
   var t = intSetGet(s, key shr TrunkShift)
   if t != nil:
     var u = key and TrunkMask
@@ -304,11 +304,11 @@ iterator elements(t: TIntSet): int {.inline.} =
           w = w shr 1
         inc(i)
       r = r.next
-  
-proc isSmallChunk(c: PChunk): bool {.inline.} = 
+
+proc isSmallChunk(c: PChunk): bool {.inline.} =
   return c.size <= SmallChunkSize-smallChunkOverhead()
-  
-proc chunkUnused(c: PChunk): bool {.inline.} = 
+
+proc chunkUnused(c: PChunk): bool {.inline.} =
   result = not c.used
 
 iterator allObjects(m: TMemRegion): pointer {.inline.} =
@@ -319,7 +319,7 @@ iterator allObjects(m: TMemRegion): pointer {.inline.} =
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          
+
           let size = c.size
           var a = cast[ByteAddress](addr(c.data))
           let limit = a + c.acc
@@ -334,17 +334,17 @@ proc isCell(p: pointer): bool {.inline.} =
   result = cast[ptr TFreeCell](p).zeroField >% 1
 
 # ------------- chunk management ----------------------------------------------
-proc pageIndex(c: PChunk): int {.inline.} = 
+proc pageIndex(c: PChunk): int {.inline.} =
   result = cast[ByteAddress](c) shr PageShift
 
-proc pageIndex(p: pointer): int {.inline.} = 
+proc pageIndex(p: pointer): int {.inline.} =
   result = cast[ByteAddress](p) shr PageShift
 
-proc pageAddr(p: pointer): PChunk {.inline.} = 
+proc pageAddr(p: pointer): PChunk {.inline.} =
   result = cast[PChunk](cast[ByteAddress](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
-proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk = 
+proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk =
   incCurrMem(a, size)
   inc(a.freeMem, size)
   result = cast[PBigChunk](osAllocPages(size))
@@ -373,7 +373,7 @@ proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk =
     result.prevSize = 0 # unknown
   a.lastSize = size # for next request
 
-proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) = 
+proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) =
   # update next.prevSize:
   var c = cast[PChunk](p)
   var nxt = cast[ByteAddress](p) +% c.size
@@ -387,36 +387,36 @@ proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) =
   dec(a.freeMem, size)
   #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size)
 
-proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} = 
+proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} =
   result = contains(a.chunkStarts, pageIndex(p))
 
-proc contains[T](list, x: T): bool = 
+proc contains[T](list, x: T): bool =
   var it = list
   while it != nil:
     if it == x: return true
     it = it.next
-    
+
 proc writeFreeList(a: TMemRegion) =
   var it = a.freeChunksList
   c_fprintf(c_stdout, "freeChunksList: %p\n", it)
-  while it != nil: 
-    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n", 
+  while it != nil:
+    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n",
               it, it.next, it.prev)
     it = it.next
 
-proc listAdd[T](head: var T, c: T) {.inline.} = 
+proc listAdd[T](head: var T, c: T) {.inline.} =
   sysAssert(c notin head, "listAdd 1")
   sysAssert c.prev == nil, "listAdd 2"
   sysAssert c.next == nil, "listAdd 3"
   c.next = head
-  if head != nil: 
+  if head != nil:
     sysAssert head.prev == nil, "listAdd 4"
     head.prev = c
   head = c
 
 proc listRemove[T](head: var T, c: T) {.inline.} =
   sysAssert(c in head, "listRemove")
-  if c == head: 
+  if c == head:
     head = c.next
     sysAssert c.prev == nil, "listRemove 2"
     if head != nil: head.prev = nil
@@ -426,15 +426,15 @@ proc listRemove[T](head: var T, c: T) {.inline.} =
     if c.next != nil: c.next.prev = c.prev
   c.next = nil
   c.prev = nil
-  
-proc updatePrevSize(a: var TMemRegion, c: PBigChunk, 
-                    prevSize: int) {.inline.} = 
+
+proc updatePrevSize(a: var TMemRegion, c: PBigChunk,
+                    prevSize: int) {.inline.} =
   var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
   sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
     ri.prevSize = prevSize
-  
-proc freeBigChunk(a: var TMemRegion, c: PBigChunk) = 
+
+proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
   var c = c
   sysAssert(c.size >= PageSize, "freeBigChunk")
   inc(a.freeMem, c.size)
@@ -448,7 +448,7 @@ proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
         inc(c.size, ri.size)
         excl(a.chunkStarts, pageIndex(ri))
   when coalescLeft:
-    if c.prevSize != 0: 
+    if c.prevSize != 0:
       var le = cast[PChunk](cast[ByteAddress](c) -% c.prevSize)
       sysAssert((cast[ByteAddress](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
@@ -467,7 +467,7 @@ proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
   else:
     freeOsChunks(a, c, c.size)
 
-proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) = 
+proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) =
   var rest = cast[PBigChunk](cast[ByteAddress](c) +% size)
   sysAssert(rest notin a.freeChunksList, "splitChunk")
   rest.size = c.size - size
@@ -480,7 +480,7 @@ proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) =
   incl(a, a.chunkStarts, pageIndex(rest))
   listAdd(a.freeChunksList, rest)
 
-proc getBigChunk(a: var TMemRegion, size: int): PBigChunk = 
+proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
   # use first fit for now:
   sysAssert((size and PageMask) == 0, "getBigChunk 1")
   sysAssert(size > 0, "getBigChunk 2")
@@ -488,7 +488,7 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
   block search:
     while result != nil:
       sysAssert chunkUnused(result), "getBigChunk 3"
-      if result.size == size: 
+      if result.size == size:
         listRemove(a.freeChunksList, result)
         break search
       elif result.size > size:
@@ -497,7 +497,7 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
         break search
       result = result.next
       sysAssert result != a.freeChunksList, "getBigChunk 4"
-    if size < InitialMemoryRequest: 
+    if size < InitialMemoryRequest:
       result = requestOsChunks(a, InitialMemoryRequest)
       splitChunk(a, result, size)
     else:
@@ -507,7 +507,7 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
   incl(a, a.chunkStarts, pageIndex(result))
   dec(a.freeMem, size)
 
-proc getSmallChunk(a: var TMemRegion): PSmallChunk = 
+proc getSmallChunk(a: var TMemRegion): PSmallChunk =
   var res = getBigChunk(a, PageSize)
   sysAssert res.prev == nil, "getSmallChunk 1"
   sysAssert res.next == nil, "getSmallChunk 2"
@@ -521,15 +521,15 @@ proc allocInv(a: TMemRegion): bool =
   for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
     var c = a.freeSmallChunks[s]
     while c != nil:
-      if c.next == c: 
+      if c.next == c:
         echo "[SYSASSERT] c.next == c"
         return false
-      if c.size != s * MemAlign: 
+      if c.size != s * MemAlign:
         echo "[SYSASSERT] c.size != s * MemAlign"
         return false
       var it = c.freeList
       while it != nil:
-        if it.zeroField != 0: 
+        if it.zeroField != 0:
           echo "[SYSASSERT] it.zeroField != 0"
           c_printf("%ld %p\n", it.zeroField, it)
           return false
@@ -539,16 +539,16 @@ proc allocInv(a: TMemRegion): bool =
 
 proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
   sysAssert(allocInv(a), "rawAlloc: begin")
-  sysAssert(roundup(65, 8) == 72, "rawAlloc 1")
-  sysAssert requestedSize >= sizeof(TFreeCell), "rawAlloc 2"
+  sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
+  sysAssert(requestedSize >= sizeof(TFreeCell), "rawAlloc: requested size too small")
   var size = roundup(requestedSize, MemAlign)
   sysAssert(size >= requestedSize, "insufficient allocated size!")
   #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
-  if size <= SmallChunkSize-smallChunkOverhead(): 
+  if size <= SmallChunkSize-smallChunkOverhead():
     # allocate a small block: for small chunks, we use only its next pointer
     var s = size div MemAlign
     var c = a.freeSmallChunks[s]
-    if c == nil: 
+    if c == nil:
       c = getSmallChunk(a)
       c.freeList = nil
       sysAssert c.size == PageSize, "rawAlloc 3"
@@ -567,7 +567,7 @@ proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
       #  c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize, 
+        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
         result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
         inc(c.acc, size)
@@ -621,9 +621,9 @@ proc rawDealloc(a: var TMemRegion, p: pointer) =
     f.zeroField = 0
     f.next = c.freeList
     c.freeList = f
-    when overwriteFree: 
+    when overwriteFree:
       # set to 0xff to check for usage after free bugs:
-      c_memset(cast[pointer](cast[int](p) +% sizeof(TFreeCell)), -1'i32, 
+      c_memset(cast[pointer](cast[int](p) +% sizeof(TFreeCell)), -1'i32,
                s -% sizeof(TFreeCell))
     # check if it is not in the freeSmallChunks[s] list:
     if c.free < s:
@@ -649,13 +649,13 @@ proc rawDealloc(a: var TMemRegion, p: pointer) =
   sysAssert(allocInv(a), "rawDealloc: end")
   when logAlloc: cprintf("rawDealloc: %p\n", p)
 
-proc isAllocatedPtr(a: TMemRegion, p: pointer): bool = 
+proc isAllocatedPtr(a: TMemRegion, p: pointer): bool =
   if isAccessible(a, p):
     var c = pageAddr(p)
     if not chunkUnused(c):
       if isSmallChunk(c):
         var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -% 
+        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
                      smallChunkOverhead()
         result = (c.acc >% offset) and (offset %% c.size == 0) and
           (cast[ptr TFreeCell](p).zeroField >% 1)
@@ -673,12 +673,12 @@ proc interiorAllocatedPtr(a: TMemRegion, p: pointer): pointer =
     if not chunkUnused(c):
       if isSmallChunk(c):
         var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -% 
+        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
                      smallChunkOverhead()
         if c.acc >% offset:
           sysAssert(cast[ByteAddress](addr(c.data)) +% offset ==
                     cast[ByteAddress](p), "offset is not what you think it is")
-          var d = cast[ptr TFreeCell](cast[ByteAddress](addr(c.data)) +% 
+          var d = cast[ptr TFreeCell](cast[ByteAddress](addr(c.data)) +%
                     offset -% (offset %% c.size))
           if d.zeroField >% 1:
             result = d
@@ -711,13 +711,13 @@ proc ptrSize(p: pointer): int =
   if not isSmallChunk(c):
     dec result, bigChunkOverhead()
 
-proc alloc(allocator: var TMemRegion, size: int): pointer =
+proc alloc(allocator: var TMemRegion, size: Natural): pointer =
   result = rawAlloc(allocator, size+sizeof(TFreeCell))
   cast[ptr TFreeCell](result).zeroField = 1 # mark it as used
   sysAssert(not isAllocatedPtr(allocator, result), "alloc")
   result = cast[pointer](cast[ByteAddress](result) +% sizeof(TFreeCell))
 
-proc alloc0(allocator: var TMemRegion, size: int): pointer =
+proc alloc0(allocator: var TMemRegion, size: Natural): pointer =
   result = alloc(allocator, size)
   zeroMem(result, size)
 
@@ -730,7 +730,7 @@ proc dealloc(allocator: var TMemRegion, p: pointer) =
   rawDealloc(allocator, x)
   sysAssert(not isAllocatedPtr(allocator, x), "dealloc 3")
 
-proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer =
+proc realloc(allocator: var TMemRegion, p: pointer, newsize: Natural): pointer =
   if newsize > 0:
     result = alloc0(allocator, newsize)
     if p != nil:
@@ -758,7 +758,7 @@ proc deallocOsPages(a: var TMemRegion) =
 
 proc getFreeMem(a: TMemRegion): int {.inline.} = result = a.freeMem
 proc getTotalMem(a: TMemRegion): int {.inline.} = result = a.currMem
-proc getOccupiedMem(a: TMemRegion): int {.inline.} = 
+proc getOccupiedMem(a: TMemRegion): int {.inline.} =
   result = a.currMem - a.freeMem
 
 # ---------------------- thread memory region -------------------------------
@@ -774,16 +774,16 @@ template instantiateForRegion(allocator: expr) =
 
   proc deallocOsPages = deallocOsPages(allocator)
 
-  proc alloc(size: int): pointer =
+  proc alloc(size: Natural): pointer =
     result = alloc(allocator, size)
 
-  proc alloc0(size: int): pointer =
+  proc alloc0(size: Natural): pointer =
     result = alloc0(allocator, size)
 
   proc dealloc(p: pointer) =
     dealloc(allocator, p)
 
-  proc realloc(p: pointer, newsize: int): pointer =
+  proc realloc(p: pointer, newsize: Natural): pointer =
     result = realloc(allocator, p, newSize)
 
   when false:
@@ -794,7 +794,7 @@ template instantiateForRegion(allocator: expr) =
         inc(result, it.size)
         it = it.next
 
-  proc getFreeMem(): int = 
+  proc getFreeMem(): int =
     result = allocator.freeMem
     #sysAssert(result == countFreeMem())
 
@@ -807,7 +807,7 @@ template instantiateForRegion(allocator: expr) =
     var heapLock: TSysLock
     initSysLock(heapLock)
 
-  proc allocShared(size: int): pointer =
+  proc allocShared(size: Natural): pointer =
     when hasThreadSupport:
       acquireSys(heapLock)
       result = alloc(sharedHeap, size)
@@ -815,20 +815,20 @@ template instantiateForRegion(allocator: expr) =
     else:
       result = alloc(size)
 
-  proc allocShared0(size: int): pointer =
+  proc allocShared0(size: Natural): pointer =
     result = allocShared(size)
     zeroMem(result, size)
 
   proc deallocShared(p: pointer) =
-    when hasThreadSupport: 
+    when hasThreadSupport:
       acquireSys(heapLock)
       dealloc(sharedHeap, p)
       releaseSys(heapLock)
     else:
       dealloc(p)
 
-  proc reallocShared(p: pointer, newsize: int): pointer =
-    when hasThreadSupport: 
+  proc reallocShared(p: pointer, newsize: Natural): pointer =
+    when hasThreadSupport:
       acquireSys(heapLock)
       result = realloc(sharedHeap, p, newsize)
       releaseSys(heapLock)
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
index c4df287cf..f68e2dcd9 100644
--- a/lib/system/arithm.nim
+++ b/lib/system/arithm.nim
@@ -15,7 +15,7 @@ proc raiseOverflow {.compilerproc, noinline, noreturn.} =
   sysFatal(OverflowError, "over- or underflow")
 
 proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
-  sysFatal(DivByZeroError, "divison by zero")
+  sysFatal(DivByZeroError, "division by zero")
 
 proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
   result = a +% b
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 429a92d34..78995954f 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -27,7 +27,7 @@ proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
     var m = selectBranch(src, n)
     # reset if different branches are in use; note different branches also
     # imply that's not self-assignment (``x = x``)!
-    if m != dd and dd != nil: 
+    if m != dd and dd != nil:
       genericResetAux(dest, dd)
     copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
             n.typ.size)
@@ -205,9 +205,13 @@ proc genericReset(dest: pointer, mt: PNimType) =
   case mt.kind
   of tyString, tyRef, tySequence:
     unsureAsgnRef(cast[PPointer](dest), nil)
-  of tyObject, tyTuple:
-    # we don't need to reset m_type field for tyObject
+  of tyTuple:
+    genericResetAux(dest, mt.node)
+  of tyObject:
     genericResetAux(dest, mt.node)
+    # also reset the type field for tyObject, for correct branch switching!
+    var pint = cast[ptr PNimType](dest)
+    pint[] = nil
   of tyArray, tyArrayConstr:
     for i in 0..(mt.size div mt.base.size)-1:
       genericReset(cast[pointer](d +% i*% mt.base.size), mt.base)
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
index f816c8201..c97d2fc7f 100644
--- a/lib/system/atomics.nim
+++ b/lib/system/atomics.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -21,19 +21,19 @@ when someGcc and hasThreadSupport:
     ## synchronization with another thread.
   var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
     ## Barrier to hoisting of code and synchronizes with
-    ## release (or stronger) 
+    ## release (or stronger)
     ## semantic stores from another thread.
   var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
     ## Barrier to sinking of code and synchronizes with
-    ## acquire (or stronger) 
-    ## semantic loads from another thread. 
+    ## acquire (or stronger)
+    ## semantic loads from another thread.
   var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
     ## Full barrier in both directions and synchronizes
-    ## with acquire loads 
+    ## with acquire loads
     ## and release stores in another thread.
   var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
     ## Full barrier in both directions and synchronizes
-    ## with acquire loads 
+    ## with acquire loads
     ## and release stores in all threads.
 
   type
@@ -46,11 +46,11 @@ when someGcc and hasThreadSupport:
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.
 
   proc atomicLoad*[T: TAtomType](p, ret: ptr T, mem: AtomMemModel) {.
-    importc: "__atomic_load", nodecl.}  
+    importc: "__atomic_load", nodecl.}
     ## This is the generic version of an atomic load. It returns the contents at p in ret.
 
   proc atomicStoreN*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel) {.
-    importc: "__atomic_store_n", nodecl.} 
+    importc: "__atomic_store_n", nodecl.}
     ## This proc implements an atomic store operation. It writes val at p.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.
 
@@ -60,39 +60,39 @@ when someGcc and hasThreadSupport:
 
   proc atomicExchangeN*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_exchange_n", nodecl.}
-    ## This proc implements an atomic exchange operation. It writes val at p, 
+    ## This proc implements an atomic exchange operation. It writes val at p,
     ## and returns the previous contents at p.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL
 
   proc atomicExchange*[T: TAtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
     importc: "__atomic_exchange", nodecl.}
-    ## This is the generic version of an atomic exchange. It stores the contents at val at p. 
+    ## This is the generic version of an atomic exchange. It stores the contents at val at p.
     ## The original value at p is copied into ret.
 
   proc atomicCompareExchangeN*[T: TAtomType](p, expected: ptr T, desired: T,
     weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange_n ", nodecl.} 
+    importc: "__atomic_compare_exchange_n ", nodecl.}
     ## This proc implements an atomic compare and exchange operation. This compares the
-    ## contents at p with the contents at expected and if equal, writes desired at p. 
-    ## If they are not equal, the current contents at p is written into expected. 
-    ## Weak is true for weak compare_exchange, and false for the strong variation. 
-    ## Many targets only offer the strong variation and ignore the parameter. 
+    ## contents at p with the contents at expected and if equal, writes desired at p.
+    ## If they are not equal, the current contents at p is written into expected.
+    ## Weak is true for weak compare_exchange, and false for the strong variation.
+    ## Many targets only offer the strong variation and ignore the parameter.
     ## When in doubt, use the strong variation.
-    ## True is returned if desired is written at p and the execution is considered 
-    ## to conform to the memory model specified by success_memmodel. There are no 
-    ## restrictions on what memory model can be used here. False is returned otherwise, 
-    ## and the execution is considered to conform to failure_memmodel. This memory model 
-    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model 
+    ## True is returned if desired is written at p and the execution is considered
+    ## to conform to the memory model specified by success_memmodel. There are no
+    ## restrictions on what memory model can be used here. False is returned otherwise,
+    ## and the execution is considered to conform to failure_memmodel. This memory model
+    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
     ## than that specified by success_memmodel.
 
   proc atomicCompareExchange*[T: TAtomType](p, expected, desired: ptr T,
     weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange_n ", nodecl.}  
-    ## This proc implements the generic version of atomic_compare_exchange. 
-    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired 
-    ## value is also a pointer. 
+    importc: "__atomic_compare_exchange", nodecl.}
+    ## This proc implements the generic version of atomic_compare_exchange.
+    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
+    ## value is also a pointer.
 
-  ## Perform the operation return the new value, all memory models are valid 
+  ## Perform the operation return the new value, all memory models are valid
   proc atomicAddFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_add_fetch", nodecl.}
   proc atomicSubFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
@@ -101,64 +101,64 @@ when someGcc and hasThreadSupport:
     importc: "__atomic_or_fetch ", nodecl.}
   proc atomicAndFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_and_fetch", nodecl.}
-  proc atomicXorFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.   
+  proc atomicXorFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_xor_fetch", nodecl.}
-  proc atomicNandFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.   
-    importc: "__atomic_nand_fetch ", nodecl.} 
+  proc atomicNandFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+    importc: "__atomic_nand_fetch ", nodecl.}
 
-  ## Perform the operation return the old value, all memory models are valid 
+  ## Perform the operation return the old value, all memory models are valid
   proc atomicFetchAdd*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_add", nodecl.}
-  proc atomicFetchSub*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.  
+  proc atomicFetchSub*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_sub", nodecl.}
-  proc atomicFetchOr*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.  
+  proc atomicFetchOr*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_or", nodecl.}
-  proc atomicFetchAnd*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.   
+  proc atomicFetchAnd*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_and", nodecl.}
-  proc atomicFetchXor*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.  
+  proc atomicFetchXor*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_xor", nodecl.}
-  proc atomicFetchNand*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.  
-    importc: "__atomic_fetch_nand", nodecl.} 
+  proc atomicFetchNand*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+    importc: "__atomic_fetch_nand", nodecl.}
 
-  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.  
-    importc: "__atomic_test_and_set", nodecl.} 
-    ## This built-in function performs an atomic test-and-set operation on the byte at p. 
+  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
+    importc: "__atomic_test_and_set", nodecl.}
+    ## This built-in function performs an atomic test-and-set operation on the byte at p.
     ## The byte is set to some implementation defined nonzero “set” value and the return
     ## value is true if and only if the previous contents were “set”.
     ## All memory models are valid.
 
-  proc atomicClear*(p: pointer, mem: AtomMemModel) {.  
+  proc atomicClear*(p: pointer, mem: AtomMemModel) {.
     importc: "__atomic_clear", nodecl.}
-    ## This built-in function performs an atomic clear operation at p. 
+    ## This built-in function performs an atomic clear operation at p.
     ## After the operation, at p contains 0.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE
 
-  proc atomicThreadFence*(mem: AtomMemModel) {.  
+  proc atomicThreadFence*(mem: AtomMemModel) {.
     importc: "__atomic_thread_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between threads based 
+    ## This built-in function acts as a synchronization fence between threads based
     ## on the specified memory model. All memory orders are valid.
 
-  proc atomicSignalFence*(mem: AtomMemModel) {.  
+  proc atomicSignalFence*(mem: AtomMemModel) {.
     importc: "__atomic_signal_fence", nodecl.}
-    ## This built-in function acts as a synchronization fence between a thread and 
+    ## This built-in function acts as a synchronization fence between a thread and
     ## signal handlers based in the same thread. All memory orders are valid.
 
-  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.  
+  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
     importc: "__atomic_always_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate 
-    ## lock free atomic instructions for the target architecture. size must resolve 
+    ## This built-in function returns true if objects of size bytes always generate
+    ## lock free atomic instructions for the target architecture. size must resolve
     ## to a compile-time constant and the result also resolves to a compile-time constant.
-    ## ptr is an optional pointer to the object that may be used to determine alignment. 
-    ## A value of 0 indicates typical alignment should be used. The compiler may also 
+    ## ptr is an optional pointer to the object that may be used to determine alignment.
+    ## A value of 0 indicates typical alignment should be used. The compiler may also
     ## ignore this parameter.
 
-  proc atomicIsLockFree*(size: int, p: pointer): bool {.  
+  proc atomicIsLockFree*(size: int, p: pointer): bool {.
     importc: "__atomic_is_lock_free", nodecl.}
-    ## This built-in function returns true if objects of size bytes always generate 
-    ## lock free atomic instructions for the target architecture. If it is not known 
+    ## This built-in function returns true if objects of size bytes always generate
+    ## lock free atomic instructions for the target architecture. If it is not known
     ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
-    ## ptr is an optional pointer to the object that may be used to determine alignment. 
-    ## A value of 0 indicates typical alignment should be used. The compiler may also 
+    ## ptr is an optional pointer to the object that may be used to determine alignment.
+    ## A value of 0 indicates typical alignment should be used. The compiler may also
     ## ignore this parameter.
 
   template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
@@ -178,7 +178,7 @@ proc atomicInc*(memLoc: var int, x: int = 1): int =
   else:
     inc(memLoc, x)
     result = memLoc
-  
+
 proc atomicDec*(memLoc: var int, x: int = 1): int =
   when someGcc and hasThreadSupport:
     when declared(atomic_sub_fetch):
@@ -206,6 +206,9 @@ else:
 when (defined(x86) or defined(amd64)) and someGcc:
   proc cpuRelax* {.inline.} =
     {.emit: """asm volatile("pause" ::: "memory");""".}
+elif someGcc:
+  proc cpuRelax* {.inline.} =
+    {.emit: """asm volatile("" ::: "memory");""".}
 elif (defined(x86) or defined(amd64)) and defined(vcc):
   proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
 elif defined(icl):
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index 089846578..f3acc81f2 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -13,7 +13,7 @@ proc addChar(s: NimString, c: char): NimString {.compilerProc, benign.}
 
 type
   TLibHandle = pointer       # private type
-  TProcAddr = pointer        # libary loading and loading of procs:
+  TProcAddr = pointer        # library loading and loading of procs:
 
 proc nimLoadLibrary(path: string): TLibHandle {.compilerproc.}
 proc nimUnloadLibrary(lib: TLibHandle) {.compilerproc.}
diff --git a/lib/system/channels.nim b/lib/system/channels.nim
index 3e5ca0795..ebd30c353 100644
--- a/lib/system/channels.nim
+++ b/lib/system/channels.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -232,9 +232,10 @@ proc tryRecv*[TMsg](c: var TChannel[TMsg]): tuple[dataAvailable: bool,
   ## it returns ``(false, default(msg))``.
   var q = cast[PRawChannel](addr(c))
   if q.mask != ChannelDeadMask:
-    if tryAcquireSys(q.lock):
-      llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
-      result.dataAvailable = true
+    if tryAcquireSys(q.lock):
+      if q.count > 0:
+        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
+        result.dataAvailable = true
       releaseSys(q.lock)
 
 proc peek*[TMsg](c: var TChannel[TMsg]): int =
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index 5c32a307a..2f6d25a12 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -11,7 +11,7 @@
 
 proc raiseRangeError(val: BiggestInt) {.compilerproc, noreturn, noinline.} =
   when hostOS == "standalone":
-    sysFatal(EOutOfRange, "value out of range")
+    sysFatal(RangeError, "value out of range")
   else:
     sysFatal(RangeError, "value out of range: ", $val)
 
@@ -44,7 +44,7 @@ proc chckRangeF(x, a, b: float): float =
     return x
   else:
     when hostOS == "standalone":
-      sysFatal(EOutOfRange, "value out of range")
+      sysFatal(RangeError, "value out of range")
     else:
       sysFatal(RangeError, "value out of range: ", $x)
 
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index fbebb17a8..093c0f3a7 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -34,9 +34,9 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode) {.benign.} =
 
 proc copyDeepString(src: NimString): NimString {.inline.} =
   if src != nil:
-    result = rawNewString(src.space)
+    result = rawNewStringNoInit(src.len)
     result.len = src.len
-    c_memcpy(result.data, src.data, (src.len + 1) * sizeof(char))
+    c_memcpy(result.data, src.data, src.len + 1)
 
 proc genericDeepCopyAux(dest, src: pointer, mt: PNimType) =
   var
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index e0d99cf88..44f7b67c3 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -8,7 +8,7 @@
 #
 
 # This file implements the ability to call native procs from libraries.
-# It is not possible to do this in a platform independant way, unfortunately.
+# It is not possible to do this in a platform independent way, unfortunately.
 # However, the interface has been designed to take platform differences into
 # account and been ported to all major platforms.
 
@@ -80,15 +80,22 @@ elif defined(windows) or defined(dos):
   # Native Windows Implementation
   # =======================================================================
   #
-  type
-    THINSTANCE {.importc: "HINSTANCE".} = pointer
+  when defined(cpp):
+    type
+      THINSTANCE {.importc: "HINSTANCE".} = object
+        x: pointer
+    proc getProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
+        importcpp: "(void*)GetProcAddress(@)", header: "<windows.h>", stdcall.}
+  else:
+    type
+      THINSTANCE {.importc: "HINSTANCE".} = pointer
+    proc getProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
+        importc: "GetProcAddress", header: "<windows.h>", stdcall.}
 
   proc freeLibrary(lib: THINSTANCE) {.
       importc: "FreeLibrary", header: "<windows.h>", stdcall.}
   proc winLoadLibrary(path: cstring): THINSTANCE {.
       importc: "LoadLibraryA", header: "<windows.h>", stdcall.}
-  proc getProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
-      importc: "GetProcAddress", header: "<windows.h>", stdcall.}
 
   proc nimUnloadLibrary(lib: TLibHandle) =
     freeLibrary(cast[THINSTANCE](lib))
diff --git a/lib/system/embedded.nim b/lib/system/embedded.nim
index 9bb25b8dd..a14f43e7e 100644
--- a/lib/system/embedded.nim
+++ b/lib/system/embedded.nim
@@ -33,11 +33,11 @@ proc quitOrDebug() {.inline.} =
   quit(1)
 
 proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
-  sysFatal(ENoExceptionToReraise, "exception handling is not available")
+  sysFatal(ReraiseError, "exception handling is not available")
 
 proc reraiseException() {.compilerRtl.} =
-  sysFatal(ENoExceptionToReraise, "no exception to reraise")
+  sysFatal(ReraiseError, "no exception to reraise")
 
 proc writeStackTrace() = discard
 
-proc setControlCHook(hook: proc () {.noconv.}) = discard
+proc setControlCHook(hook: proc () {.noconv.} not nil) = discard
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 237b42482..189d52f57 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -58,7 +58,7 @@ proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
 proc popSafePoint {.compilerRtl, inl.} =
   excHandler = excHandler.prev
 
-proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = 
+proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} =
   e.parent = currException
   currException = e
 
@@ -69,12 +69,12 @@ proc popCurrentException {.compilerRtl, inl.} =
 const
   nativeStackTraceSupported* = (defined(macosx) or defined(linux)) and
                               not NimStackTrace
-  hasSomeStackTrace = NimStackTrace or 
+  hasSomeStackTrace = NimStackTrace or
     defined(nativeStackTrace) and nativeStackTraceSupported
 
 when defined(nativeStacktrace) and nativeStackTraceSupported:
   type
-    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>", 
+    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>",
                final, pure.} = object
       dli_fname: cstring
       dli_fbase: pointer
@@ -98,7 +98,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
-    var size = backtrace(cast[ptr pointer](addr(tempAddresses)), 
+    var size = backtrace(cast[ptr pointer](addr(tempAddresses)),
                          len(tempAddresses))
     var enabled = false
     for i in 0..size-1:
@@ -123,7 +123,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 when not hasThreadSupport:
   var
     tempFrames: array [0..127, PFrame] # should not be alloc'd on stack
-  
+
 proc auxWriteStackTrace(f: PFrame, s: var string) =
   when hasThreadSupport:
     var
@@ -160,7 +160,7 @@ proc auxWriteStackTrace(f: PFrame, s: var string) =
     inc(i)
     b = b.prev
   for j in countdown(i-1, 0):
-    if tempFrames[j] == nil: 
+    if tempFrames[j] == nil:
       add(s, "(")
       add(s, $skipped)
       add(s, " calls omitted) ...")
@@ -175,6 +175,8 @@ proc auxWriteStackTrace(f: PFrame, s: var string) =
       add(s, tempFrames[j].procname)
     add(s, "\n")
 
+proc stackTraceAvailable*(): bool
+
 when hasSomeStackTrace:
   proc rawWriteStackTrace(s: var string) =
     when NimStackTrace:
@@ -188,6 +190,18 @@ when hasSomeStackTrace:
       auxWriteStackTraceWithBacktrace(s)
     else:
       add(s, "No stack traceback available\n")
+  proc stackTraceAvailable(): bool =
+    when NimStackTrace:
+      if framePtr == nil:
+        result = false
+      else:
+        result = true
+    elif defined(nativeStackTrace) and nativeStackTraceSupported:
+      result = true
+    else:
+      result = false
+else:
+  proc stackTraceAvailable*(): bool = result = false
 
 proc quitOrDebug() {.inline.} =
   when not defined(endb):
@@ -200,41 +214,49 @@ proc raiseExceptionAux(e: ref Exception) =
     if not localRaiseHook(e): return
   if globalRaiseHook != nil:
     if not globalRaiseHook(e): return
-  if excHandler != nil:
-    if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
+  when defined(cpp):
+    if e[] of OutOfMemError:
+      showErrorMessage(e.name)
+      quitOrDebug()
+    else:
       pushCurrentException(e)
-      c_longjmp(excHandler.context, 1)
-  elif e[] of OutOfMemError:
-    showErrorMessage(e.name)
-    quitOrDebug()
+      {.emit: "throw NimException(`e`, `e`->name);".}
   else:
-    when hasSomeStackTrace:
-      var buf = newStringOfCap(2000)
-      if isNil(e.trace): rawWriteStackTrace(buf)
-      else: add(buf, e.trace)
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      add(buf, $e.name)
-      add(buf, "]\n")
-      showErrorMessage(buf)
+    if excHandler != nil:
+      if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
+        pushCurrentException(e)
+        c_longjmp(excHandler.context, 1)
+    elif e[] of OutOfMemError:
+      showErrorMessage(e.name)
+      quitOrDebug()
     else:
-      # ugly, but avoids heap allocations :-)
-      template xadd(buf, s, slen: expr) =
-        if L + slen < high(buf):
-          copyMem(addr(buf[L]), cstring(s), slen)
-          inc L, slen
-      template add(buf, s: expr) =
-        xadd(buf, s, s.len)
-      var buf: array [0..2000, char]
-      var L = 0
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      xadd(buf, e.name, c_strlen(e.name))
-      add(buf, "]\n")
-      showErrorMessage(buf)
-    quitOrDebug()
+      when hasSomeStackTrace:
+        var buf = newStringOfCap(2000)
+        if isNil(e.trace): rawWriteStackTrace(buf)
+        else: add(buf, e.trace)
+        add(buf, "Error: unhandled exception: ")
+        if not isNil(e.msg): add(buf, e.msg)
+        add(buf, " [")
+        add(buf, $e.name)
+        add(buf, "]\n")
+        showErrorMessage(buf)
+      else:
+        # ugly, but avoids heap allocations :-)
+        template xadd(buf, s, slen: expr) =
+          if L + slen < high(buf):
+            copyMem(addr(buf[L]), cstring(s), slen)
+            inc L, slen
+        template add(buf, s: expr) =
+          xadd(buf, s, s.len)
+        var buf: array [0..2000, char]
+        var L = 0
+        add(buf, "Error: unhandled exception: ")
+        if not isNil(e.msg): add(buf, e.msg)
+        add(buf, " [")
+        xadd(buf, e.name, c_strlen(e.name))
+        add(buf, "]\n")
+        showErrorMessage(buf)
+      quitOrDebug()
 
 proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
   e.name = ename
@@ -295,8 +317,8 @@ when not defined(noSignalHandler):
   proc signalHandler(sig: cint) {.exportc: "signalHandler", noconv.} =
     template processSignal(s, action: expr) {.immediate,  dirty.} =
       if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
-      elif s == SIGSEGV: 
-        action("SIGSEGV: Illegal storage access. (Try to compile with -d:useSysAssert -d:useGcAssert for details.)\n")
+      elif s == SIGSEGV:
+        action("SIGSEGV: Illegal storage access. (Attempt to read from nil?)\n")
       elif s == SIGABRT:
         when defined(endb):
           if dbgAborting: return # the debugger wants to abort
@@ -304,7 +326,7 @@ when not defined(noSignalHandler):
       elif s == SIGFPE: action("SIGFPE: Arithmetic error.\n")
       elif s == SIGILL: action("SIGILL: Illegal operation.\n")
       elif s == SIGBUS:
-        action("SIGBUS: Illegal storage access.  (Try to compile with -d:useSysAssert -d:useGcAssert for details.)\n")
+        action("SIGBUS: Illegal storage access. (Attempt to read from nil?)\n")
       else:
         block platformSpecificSignal:
           when declared(SIGPIPE):
@@ -341,7 +363,7 @@ when not defined(noSignalHandler):
 
   registerSignalHandler() # call it in initialization section
 
-proc setControlCHook(hook: proc () {.noconv.}) =
+proc setControlCHook(hook: proc () {.noconv.} not nil) =
   # ugly cast, but should work on all architectures:
   type TSignalHandler = proc (sig: cint) {.noconv, benign.}
   c_signal(SIGINT, cast[TSignalHandler](hook))
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index e0db3fba4..c4374d00c 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -33,7 +33,7 @@ const
 when withRealTime and not declared(getTicks):
   include "system/timers"
 when defined(memProfiler):
-  proc nimProfile(requestedSize: int)
+  proc nimProfile(requestedSize: int) {.benign.}
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -48,8 +48,8 @@ type
   TWalkOp = enum
     waMarkGlobal,    # part of the backup/debug mark&sweep
     waMarkPrecise,   # part of the backup/debug mark&sweep
-    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, 
-    waCollectWhite,
+    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack,
+    waCollectWhite #, waDebug
 
   TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
@@ -61,9 +61,9 @@ type
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table  
+    cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
-  
+
   TGcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
     stackBottom: pointer
@@ -88,11 +88,11 @@ var
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: TGcHeap) = 
+template acquire(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
 
-template release(gch: TGcHeap) = 
+template release(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -117,7 +117,7 @@ proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(TCell)))
 
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
+proc canBeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
@@ -163,7 +163,7 @@ when hasThreadSupport and hasSharedHeap:
   template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
   template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
 else:
-  template `--`(x: expr): expr = 
+  template `--`(x: expr): expr =
     dec(x, rcIncrement)
     x <% rcIncrement
   template `++`(x: expr): stmt = inc(x, rcIncrement)
@@ -181,7 +181,7 @@ proc prepareDealloc(cell: PCell) =
     (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
+proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
@@ -211,7 +211,7 @@ proc decRef(c: PCell) {.inline.} =
     rtlAddCycleRoot(c)
     #writeCell("decRef", c)
 
-proc incRef(c: PCell) {.inline.} = 
+proc incRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
   c.refcount = c.refcount +% rcIncrement
   # and not colorMask
@@ -246,12 +246,12 @@ proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   dest[] = src
 
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no 
+  # the code generator calls this proc if it is known at compile time that no
   # cycle is possible.
   if src != nil:
     var c = usrToCell(src)
     ++c.refcount
-  if dest[] != nil: 
+  if dest[] != nil:
     var c = usrToCell(dest[])
     if --c.refcount:
       rtlAddZCT(c)
@@ -269,7 +269,7 @@ proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
     if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, 
+    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
              "stack loc AND interior pointer")
   dest[] = src
 
@@ -321,7 +321,7 @@ when useMarkForDebug or useBackupGc:
       echo "[GC] cannot register global variable; too many global variables"
       quit 1
 
-proc cellsetReset(s: var TCellSet) = 
+proc cellsetReset(s: var TCellSet) =
   deinit(s)
   init(s)
 
@@ -336,7 +336,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.benign.} =
         if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
           doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
                             n.sons[i].typ, op)
       else:
         forAllSlotsAux(dest, n.sons[i], op)
@@ -384,7 +384,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
   # we check the last 8 entries (cache line) for a slot that could be reused.
   # In 63% of all cases we succeed here! But we have to optimize the heck
   # out of this small linear search so that ``newObj`` is not slowed down.
-  # 
+  #
   # Slots to try          cache hit
   # 1                     32%
   # 4                     59%
@@ -461,6 +461,10 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
 
 {.pop.}
 
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
+
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
@@ -481,7 +485,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
-  
+
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
@@ -510,7 +514,7 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
-  
+
 proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   acquire(gch)
   collectCT(gch)
@@ -522,34 +526,42 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
   var elemSize = 1
   if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  
+
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
   copyMem(res, ol, oldsize + sizeof(TCell))
   zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
           newsize-oldsize)
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  #if res.refcount <% rcIncrement:
-  #  add(gch.zct, res)
-  #else: # XXX: what to do here?
-  #  decRef(ol)
-  if (ol.refcount and ZctFlag) != 0:
-    var j = gch.zct.len-1
-    var d = gch.zct.d
-    while j >= 0: 
-      if d[j] == ol:
-        d[j] = res
-        break
-      dec(j)
-  if canbeCycleRoot(ol): excl(gch.cycleRoots, ol)
+  # This can be wrong for intermediate temps that are nevertheless on the
+  # heap because of lambda lifting:
+  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
   when logGC:
     writeCell("growObj old cell", ol)
     writeCell("growObj new cell", res)
   gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
-  when reallyDealloc: 
+  when reallyDealloc:
     sysAssert(allocInv(gch.region), "growObj before dealloc")
-    rawDealloc(gch.region, ol)
+    if ol.refcount shr rcShift <=% 1:
+      # free immediately to save space:
+      if (ol.refcount and ZctFlag) != 0:
+        var j = gch.zct.len-1
+        var d = gch.zct.d
+        while j >= 0:
+          if d[j] == ol:
+            d[j] = res
+            break
+          dec(j)
+      if canbeCycleRoot(ol): excl(gch.cycleRoots, ol)
+      rawDealloc(gch.region, ol)
+    else:
+      # we split the old refcount in 2 parts. XXX This is still not entirely
+      # correct if the pointer that receives growObj's result is on the stack.
+      # A better fix would be to emit the location specific write barrier for
+      # 'growObj', but this is lost of more work and who knows what new problems
+      # this would create.
+      res.refcount = rcIncrement
+      decRef(ol)
   else:
     sysAssert(ol.typ != nil, "growObj: 5")
     zeroMem(ol, sizeof(TCell))
@@ -572,7 +584,7 @@ proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
   prepareDealloc(c)
   gcTrace(c, csCycFreed)
   when logGC: writeCell("cycle collector dealloc cell", c)
-  when reallyDealloc: 
+  when reallyDealloc:
     sysAssert(allocInv(gch.region), "free cyclic cell")
     rawDealloc(gch.region, c)
   else:
@@ -595,9 +607,15 @@ proc scan(s: PCell) =
     else:
       s.setColor(rcWhite)
       forAllChildren(s, waScan)
-  
+
 proc collectWhite(s: PCell) =
-  if s.color == rcWhite and s notin gch.cycleRoots:
+  # This is a hacky way to deal with the following problem (bug #1796)
+  # Consider this content in cycleRoots:
+  #   x -> a; y -> a  where 'a' is an acyclic object so not included in
+  # cycleRoots itself. Then 'collectWhite' used to free 'a' twice. The
+  # 'isAllocatedPtr' check prevents this. This also means we do not need
+  # to query 's notin gch.cycleRoots' at all.
+  if isAllocatedPtr(gch.region, s) and s.color == rcWhite:
     s.setColor(rcBlack)
     forAllChildren(s, waCollectWhite)
     freeCyclicCell(gch, s)
@@ -648,6 +666,28 @@ when useMarkForDebug or useBackupGc:
       if objStart != nil:
         markS(gch, objStart)
 
+when logGC:
+  var
+    cycleCheckA: array[100, PCell]
+    cycleCheckALen = 0
+
+  proc alreadySeen(c: PCell): bool =
+    for i in 0 .. <cycleCheckALen:
+      if cycleCheckA[i] == c: return true
+    if cycleCheckALen == len(cycleCheckA):
+      gcAssert(false, "cycle detection overflow")
+      quit 1
+    cycleCheckA[cycleCheckALen] = c
+    inc cycleCheckALen
+
+  proc debugGraph(s: PCell) =
+    if alreadySeen(s):
+      writeCell("child cell (already seen) ", s)
+    else:
+      writeCell("cell {", s)
+      forAllChildren(s, waDebug)
+      c_fprintf(c_stdout, "}\n")
+
 proc doOperation(p: pointer, op: TWalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
@@ -690,6 +730,7 @@ proc doOperation(p: pointer, op: TWalkOp) =
   of waMarkPrecise:
     when useMarkForDebug or useBackupGc:
       add(gch.tempStack, c)
+  #of waDebug: debugGraph(c)
 
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, TWalkOp(op))
@@ -702,7 +743,6 @@ when useMarkForDebug or useBackupGc:
 
 proc collectRoots(gch: var TGcHeap) =
   for s in elements(gch.cycleRoots):
-    excl(gch.cycleRoots, s)
     collectWhite(s)
 
 proc collectCycles(gch: var TGcHeap) =
@@ -731,7 +771,7 @@ proc collectCycles(gch: var TGcHeap) =
       gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
       gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
       if canBeCycleRoot(c):
-        #if c notin gch.cycleRoots: 
+        #if c notin gch.cycleRoots:
         inc cycleRootsLen
         incl(gch.cycleRoots, c)
       gcAssert c.typ != nil, "addBackStackRoots 2"
@@ -758,12 +798,12 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var TGcHeap) = 
+proc markThreadStacks(gch: var TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     {.error: "not fully implemented".}
     var it = threadList
     while it != nil:
-      # mark registers: 
+      # mark registers:
       for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
       var sp = cast[TAddress](it.stackBottom)
       var max = cast[TAddress](it.stackTop)
@@ -848,7 +888,7 @@ elif stackIncreases:
   var
     jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
       # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
+      # in a platform independent way
 
   template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
     var registers: C_JmpBuf
@@ -897,7 +937,7 @@ else:
       while sp <=% max:
         gcMark(gch, cast[PPointer](sp)[])
         sp = sp +% sizeof(pointer)
-    
+
 proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
 
@@ -910,13 +950,13 @@ when useMarkForDebug or useBackupGc:
 # ----------------------------------------------------------------------------
 
 proc collectZCT(gch: var TGcHeap): bool =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do 
-  # deep freeing, which is bad for incremental operation. In order to 
+  # Note: Freeing may add child objects to the ZCT! So essentially we do
+  # deep freeing, which is bad for incremental operation. In order to
   # avoid a deep stack, we move objects to keep the ZCT small.
   # This is performance critical!
   const workPackage = 100
   var L = addr(gch.zct.len)
-  
+
   when withRealTime:
     var steps = workPackage
     var t0: TTicks
@@ -926,15 +966,15 @@ proc collectZCT(gch: var TGcHeap): bool =
     sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
     # remove from ZCT:
     gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
-    
+
     c.refcount = c.refcount and not ZctFlag
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
     when withRealTime: dec steps
-    if c.refcount <% rcIncrement: 
+    if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT 
+      # ``incref`` does not bother to remove the cell from the ZCT
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
@@ -947,7 +987,7 @@ proc collectZCT(gch: var TGcHeap): bool =
       # access invalid memory. This is done by prepareDealloc():
       prepareDealloc(c)
       forAllChildren(c, waZctDecRef)
-      when reallyDealloc: 
+      when reallyDealloc:
         sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
         rawDealloc(gch.region, c)
       else:
@@ -958,7 +998,7 @@ proc collectZCT(gch: var TGcHeap): bool =
         steps = workPackage
         if gch.maxPause > 0:
           let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions 
+          # the GC's measuring is not accurate and needs some cleanup actions
           # (stack unmarking), so subtract some short amount of time in
           # order to miss deadlines less often:
           if duration >= gch.maxPause - 50_000:
@@ -981,7 +1021,7 @@ proc collectCTBody(gch: var TGcHeap) =
   when withRealTime:
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
-  
+
   gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
@@ -1000,7 +1040,7 @@ proc collectCTBody(gch: var TGcHeap) =
         gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
-  
+
   when withRealTime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
@@ -1014,8 +1054,12 @@ when useMarkForDebug or useBackupGc:
     markGlobals(gch)
 
 proc collectCT(gch: var TGcHeap) =
-  if (gch.zct.len >= ZctThreshold or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+  # stackMarkCosts prevents some pathological behaviour: Stack marking
+  # becomes more expensive with large stacks and large stacks mean that
+  # cells with RC=0 are more likely to be kept alive by the stack.
+  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  if (gch.zct.len >= stackMarkCosts or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     when useMarkForDebug:
       prepareForInteriorPointerChecking(gch.region)
@@ -1034,7 +1078,7 @@ when withRealTime:
     acquire(gch)
     gch.maxPause = us.toNano
     if (gch.zct.len >= ZctThreshold or (cycleGC and
-        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
     release(gch)
@@ -1042,13 +1086,13 @@ when withRealTime:
   proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
 
 when not defined(useNimRtl):
-  proc GC_disable() = 
+  proc GC_disable() =
     when hasThreadSupport and hasSharedHeap:
       discard atomicInc(gch.recGcLock, 1)
     else:
       inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock > 0: 
+    if gch.recGcLock > 0:
       when hasThreadSupport and hasSharedHeap:
         discard atomicDec(gch.recGcLock, 1)
       else:
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index ee52b54f5..4e3dee51c 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -128,7 +128,7 @@ type
                              # cycle roots table that uses a cheap linear scan
                              # to find only possitively dead objects.
                              # One strategy is to perform it only for new objects
-                             # allocated between the invocations of CollectZCT.
+                             # allocated between the invocations of collectZCT.
                              # This index indicates the start of the range of
                              # such new objects within the table.
     when withRealTime:
@@ -140,7 +140,7 @@ var
   gch* {.rtlThreadVar.}: TGcHeap
 
 when not defined(useNimRtl):
-  InstantiateForRegion(gch.region)
+  instantiateForRegion(gch.region)
 
 template acquire(gch: TGcHeap) = 
   when hasThreadSupport and hasSharedHeap:
@@ -233,11 +233,11 @@ template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(TCell)))
 
 proc usrToCell*(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
+  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(TCell)))
 
 proc canbeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
@@ -255,10 +255,10 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
 
 # forward declarations:
 proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
+proc isOnStack*(p: pointer): bool {.noinline.}
 proc forAllChildren(cell: PCell, op: TWalkOp)
 proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp)
 # we need the prototype here for debugging purposes
 
 proc prepareDealloc(cell: PCell) =
@@ -432,7 +432,7 @@ proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-template doAsgnRef(dest: ppointer, src: pointer,
+template doAsgnRef(dest: PPointer, src: pointer,
                   heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
   sysAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
@@ -440,20 +440,20 @@ template doAsgnRef(dest: ppointer, src: pointer,
   if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
   dest[] = src
 
-proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc!
   doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
 
-proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc if it is known at compile time that no 
   # cycle is possible.
   doAsgnRef(dest, src, LocalHeap, Acyclic)
 
-proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
-  if not IsOnStack(dest):
+  if not isOnStack(dest):
     if src != nil: doIncRef(usrToCell(src))
     # XXX we must detect a shared heap here
     # better idea may be to just eliminate the need for unsureAsgnRef
@@ -470,16 +470,16 @@ proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
 
 when hasThreadSupport and hasSharedHeap:
   # shared heap version of the above procs
-  proc asgnRefSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  proc asgnRefSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
     doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
 
-  proc asgnRefNoCycleSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  proc asgnRefNoCycleSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
     doAsgnRef(dest, src, SharedHeap, Acyclic)
 
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
-      for i in low(TCellState)..high(TCellState): Init(states[i])
+      for i in low(TCellState)..high(TCellState): init(states[i])
     gch.cycleThreshold = InitialCycleThreshold
     gch.stat.stackScans = 0
     gch.stat.cycleCollections = 0
@@ -491,11 +491,11 @@ proc initGC() =
     init(gch.zct)
     init(gch.tempStack)
     init(gch.freeStack)
-    Init(gch.cycleRoots)
-    Init(gch.decStack)
+    init(gch.cycleRoots)
+    init(gch.decStack)
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+  var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -503,7 +503,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
       # inlined for speed
       if n.sons[i].kind == nkSlot:
         if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
-          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
+          doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
           forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
                             n.sons[i].typ, op)
@@ -514,19 +514,19 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
+  var d = cast[ByteAddress](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
-    case mt.Kind
+    case mt.kind
     of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[ppointer](d)[], op)
+      doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
     of tyArray, tyArrayConstr, tyOpenArray:
       for i in 0..(mt.size div mt.base.size)-1:
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: nil
+    else: discard
 
 proc forAllChildren(cell: PCell, op: TWalkOp) =
   sysAssert(cell != nil, "forAllChildren: 1")
@@ -536,18 +536,18 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
-    case cell.typ.Kind
+    case cell.typ.kind
     of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[TAddress](cellToUsr(cell))
+      var d = cast[ByteAddress](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
         let baseAddr = d +% GenericSeqSize
         for i in 0..s.len-1:
           forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
                             cell.typ.base, op)
-    else: nil
+    else: discard
 
 proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
   # we check the last 8 entries (cache line) for a slot that could be reused.
@@ -593,7 +593,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1 = false): pointer =
   # generates a new object and sets its reference counter to 0
   acquire(gch)
   sysAssert(allocInv(gch.region), "rawNewObj begin")
@@ -605,7 +605,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
   sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
 
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   
   res.typ = typ
   
@@ -634,8 +634,6 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
   gcTrace(res, csAllocated)
   release(gch)
   result = cellToUsr(res)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
 {.pop.}
@@ -670,23 +668,31 @@ template trimAt(roots: var TCellSeq, at: int): stmt =
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   setStackTop(gch)
   result = rawNewObj(typ, size, gch, false)
-  
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  setStackTop(gch)
+  result = rawNewObj(typ, size, gch, false)
+  when defined(memProfiler): nimProfile(size)
+
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   setStackTop(gch)
-  # `rawNewObj` already uses locks, so no need for them here.
+  # `newObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = rawNewObj(typ, size, gch, false)
+  result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   setStackTop(gch)
   result = rawNewObj(typ, size, gch, true)
+  when defined(memProfiler): nimProfile(size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   setStackTop(gch)
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = rawNewObj(typ, size, gch, true)
+  result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
@@ -708,10 +714,10 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   # call user-defined move code
   # call user-defined default constructor
   copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
           newsize-oldsize)
 
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
   
   when false:
@@ -786,10 +792,10 @@ type
     FromChildren,
     FromRoot
 
-proc CollectZCT(gch: var TGcHeap): bool
+proc collectZCT(gch: var TGcHeap): bool
 
 template pseudoRecursion(typ: TRecursionType, body: stmt): stmt =
-  #
+  discard
 
 proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
   var i = startIdx
@@ -967,17 +973,17 @@ proc collectCycles(gch: var TGcHeap) =
       maybedeads,
       collected
 
-  Deinit(gch.cycleRoots)
-  Init(gch.cycleRoots)
+  deinit(gch.cycleRoots)
+  init(gch.cycleRoots)
 
-  Deinit(gch.freeStack)
-  Init(gch.freeStack)
+  deinit(gch.freeStack)
+  init(gch.freeStack)
 
   when MarkingSkipsAcyclicObjects:
     # Collect the acyclic objects that became unreachable due to collected
     # cyclic objects. 
-    discard CollectZCT(gch)
-    # CollectZCT may add new cycle candidates and we may decide to loop here
+    discard collectZCT(gch)
+    # collectZCT may add new cycle candidates and we may decide to loop here
     # if gch.cycleRoots.len > 0: repeat
 
 var gcDebugging* = false
@@ -988,7 +994,7 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
   var cell = usrToCell(p)
-  var c = cast[TAddress](cell)
+  var c = cast[ByteAddress](cell)
   if c >% PageSize:
     # fast check: does it look like a cell?
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
@@ -997,6 +1003,7 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
       if objStart.color != rcReallyDead:
         if gcDebugging:
           # writeCell("marking ", objStart)
+          discard
         else:
           inc objStart.refcount, rcIncrement
           gch.decStack.add objStart
@@ -1009,6 +1016,7 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
         # coincidence due to the conservative stack marking.
         when debugGC:
           # writeCell("marking dead object", objStart)
+          discard
     when false:
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
@@ -1024,12 +1032,12 @@ proc markThreadStacks(gch: var TGcHeap) =
     while it != nil:
       # mark registers: 
       for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[TAddress](it.stackBottom)
-      var max = cast[TAddress](it.stackTop)
+      var sp = cast[ByteAddress](it.stackBottom)
+      var max = cast[ByteAddress](it.stackTop)
       # XXX stack direction?
       # XXX unroll this loop:
       while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
+        gcMark(gch, cast[PPointer](sp)[])
         sp = sp +% sizeof(pointer)
       it = it.next
 
@@ -1051,8 +1059,8 @@ when not defined(useNimRtl):
     # the first init must be the one that defines the stack bottom:
     if gch.stackBottom == nil: gch.stackBottom = theStackBottom
     else:
-      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[TAddress](gch.stackBottom)
+      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[ByteAddress](gch.stackBottom)
       #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
       when stackIncreases:
         gch.stackBottom = cast[pointer](min(a, b))
@@ -1067,15 +1075,15 @@ proc stackSize(): int {.noinline.} =
 var
   jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
     # a little hack to get the size of a TJmpBuf in the generated C code
-    # in a platform independant way
+    # in a platform independent way
 
 when defined(sparc): # For SPARC architecture.
   proc isOnStack(p: pointer): bool =
     var stackTop {.volatile.}: pointer
     stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
+    var b = cast[ByteAddress](gch.stackBottom)
+    var a = cast[ByteAddress](stackTop)
+    var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
 
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
@@ -1092,7 +1100,7 @@ when defined(sparc): # For SPARC architecture.
     # Addresses decrease as the stack grows.
     while sp <= max:
       gcMark(gch, sp[])
-      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
+      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
 
 elif defined(ELATE):
   {.error: "stack marking code is to be written for this architecture".}
@@ -1104,20 +1112,20 @@ elif stackIncreases:
   proc isOnStack(p: pointer): bool =
     var stackTop {.volatile.}: pointer
     stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
+    var a = cast[ByteAddress](gch.stackBottom)
+    var b = cast[ByteAddress](stackTop)
+    var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
   
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      var max = cast[ByteAddress](gch.stackBottom)
+      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
       # sp will traverse the JMP_BUF as well (jmp_buf size is added,
       # otherwise sp would be below the registers structure).
       while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
+        gcMark(gch, cast[PPointer](sp)[])
         sp = sp -% sizeof(pointer)
 
 else:
@@ -1127,9 +1135,9 @@ else:
   proc isOnStack(p: pointer): bool =
     var stackTop {.volatile.}: pointer
     stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
+    var b = cast[ByteAddress](gch.stackBottom)
+    var a = cast[ByteAddress](stackTop)
+    var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
 
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
@@ -1141,18 +1149,18 @@ else:
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
       when MinimumStackMarking:
         # mark the registers
-        var jmpbufPtr = cast[TAddress](addr(registers))
+        var jmpbufPtr = cast[ByteAddress](addr(registers))
         var jmpbufEnd = jmpbufPtr +% jmpbufSize
       
         while jmpbufPtr <=% jmpbufEnd:
-          gcMark(gch, cast[ppointer](jmpbufPtr)[])
+          gcMark(gch, cast[PPointer](jmpbufPtr)[])
           jmpbufPtr = jmpbufPtr +% sizeof(pointer)
 
-        var sp = cast[TAddress](gch.stackTop)
+        var sp = cast[ByteAddress](gch.stackTop)
       else:
-        var sp = cast[TAddress](addr(registers))
+        var sp = cast[ByteAddress](addr(registers))
       # mark the user stack
-      var max = cast[TAddress](gch.stackBottom)
+      var max = cast[ByteAddress](gch.stackBottom)
       # loop unrolled:
       while sp <% max - 8*sizeof(pointer):
         gcMark(gch, cast[PStackSlice](sp)[0])
@@ -1166,7 +1174,7 @@ else:
         sp = sp +% sizeof(pointer)*8
       # last few entries:
       while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
+        gcMark(gch, cast[PPointer](sp)[])
         sp = sp +% sizeof(pointer)
 
 # ----------------------------------------------------------------------------
@@ -1202,7 +1210,7 @@ proc releaseCell(gch: var TGcHeap, cell: PCell) =
   #  recursion).
   # We can ignore it now as the ZCT cleaner will reach it soon.
 
-proc CollectZCT(gch: var TGcHeap): bool =
+proc collectZCT(gch: var TGcHeap): bool =
   const workPackage = 100
   var L = addr(gch.zct.len)
   
@@ -1213,8 +1221,8 @@ proc CollectZCT(gch: var TGcHeap): bool =
   
   while L[] > 0:
     var c = gch.zct.d[0]
-    sysAssert c.isBitUp(rcZct), "CollectZCT: rcZct missing!"
-    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
+    sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!"
+    sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr")
     
     # remove from ZCT:    
     c.clearBit(rcZct)
@@ -1263,7 +1271,7 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
     # XXX no need for an atomic dec here:
     if c.refcount--(LocalHeap):
       # the object survived only because of a stack reference
-      # it still doesn't have heap refernces
+      # it still doesn't have heap references
       addZCT(gch.zct, c)
     
     if canbeCycleRoot(c):
@@ -1295,7 +1303,7 @@ proc collectCTBody(gch: var TGcHeap) =
         sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
         inc(gch.stat.cycleCollections)
         gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                 cycleIncrease)
+                                 CycleIncrease)
         gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
@@ -1344,12 +1352,12 @@ when not defined(useNimRtl):
       else:
         dec(gch.recGcLock)
 
-  proc GC_setStrategy(strategy: TGC_Strategy) =
+  proc GC_setStrategy(strategy: GC_Strategy) =
     case strategy
-    of gcThroughput: nil
-    of gcResponsiveness: nil
-    of gcOptimizeSpace: nil
-    of gcOptimizeTime: nil
+    of gcThroughput: discard
+    of gcResponsiveness: discard
+    of gcOptimizeSpace: discard
+    of gcOptimizeTime: discard
 
   proc GC_enableMarkAndSweep() =
     gch.cycleThreshold = InitialCycleThreshold
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index 0eafd3255..e287bf5d9 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -1,19 +1,19 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# A simple mark&sweep garbage collector for Nim. Define the 
+# A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
 {.push profiler:off.}
 
 const
   InitialThreshold = 4*1024*1024 # X MB because marking&sweeping is slow
-  withBitvectors = defined(gcUseBitvectors) 
+  withBitvectors = defined(gcUseBitvectors)
   # bitvectors are significantly faster for GC-bench, but slower for
   # bootstrapping and use more memory
   rcWhite = 0
@@ -29,21 +29,21 @@ type
   TWalkOp = enum
     waMarkGlobal,  # we need to mark conservatively for global marker procs
                    # as these may refer to a global var and not to a thread
-                   # local 
+                   # local
     waMarkPrecise  # fast precise marking
 
   TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
-  
+
   TGlobalMarkerProc = proc () {.nimcall, benign.}
 
   TGcStat = object
     collections: int         # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
-    freedObjects: int        # max entries in cycle table  
-  
+    freedObjects: int        # max entries in cycle table
+
   TGcHeap = object           # this contains the zero count and
                              # non-zero count table
     stackBottom: pointer
@@ -64,11 +64,11 @@ var
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: TGcHeap) = 
+template acquire(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
 
-template release(gch: TGcHeap) = 
+template release(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -134,7 +134,7 @@ proc prepareDealloc(cell: PCell) =
     (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
-proc nimGCref(p: pointer) {.compilerProc.} = 
+proc nimGCref(p: pointer) {.compilerProc.} =
   # we keep it from being collected by pretending it's not even allocated:
   when false:
     when withBitvectors: excl(gch.allocated, usrToCell(p))
@@ -165,7 +165,7 @@ proc initGC() =
     init(gch.tempStack)
     init(gch.additionalRoots)
     when withBitvectors:
-      Init(gch.allocated)
+      init(gch.allocated)
       init(gch.marked)
 
 var
@@ -261,6 +261,10 @@ proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
+
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   # `newObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
@@ -273,34 +277,36 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
-  
+
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
   result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
-  
+
 proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
   gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-  
+
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
   var elemSize = 1
   if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  
+
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
   copyMem(res, ol, oldsize + sizeof(TCell))
   zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
           newsize-oldsize)
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when withBitvectors: excl(gch.allocated, ol)
-  when reallyDealloc: rawDealloc(gch.region, ol)
-  else:
-    zeroMem(ol, sizeof(TCell))
+  when false:
+    # this is wrong since seqs can be shared via 'shallow':
+    when withBitvectors: excl(gch.allocated, ol)
+    when reallyDealloc: rawDealloc(gch.region, ol)
+    else:
+      zeroMem(ol, sizeof(TCell))
   when withBitvectors: incl(gch.allocated, res)
   when useCellIds:
     inc gch.idGenerator
@@ -399,7 +405,7 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
     if objStart != nil:
       mark(gch, objStart)
-  
+
 # ----------------- stack management --------------------------------------
 #  inspired from Smart Eiffel
 
@@ -474,7 +480,7 @@ elif stackIncreases:
   var
     jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
       # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
+      # in a platform independent way
 
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
     var registers: C_JmpBuf
@@ -534,7 +540,7 @@ proc collectCTBody(gch: var TGcHeap) =
   markStackAndRegisters(gch)
   markGlobals(gch)
   sweep(gch)
-  
+
   inc(gch.stat.collections)
   when withBitvectors:
     deinit(gch.marked)
@@ -542,19 +548,19 @@ proc collectCTBody(gch: var TGcHeap) =
   gch.cycleThreshold = max(InitialThreshold, getOccupiedMem().mulThreshold)
   gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   sysAssert(allocInv(gch.region), "collectCT: end")
-  
+
 proc collectCT(gch: var TGcHeap) =
   if getOccupiedMem(gch.region) >= gch.cycleThreshold and gch.recGcLock == 0:
     collectCTBody(gch)
 
 when not defined(useNimRtl):
-  proc GC_disable() = 
+  proc GC_disable() =
     when hasThreadSupport and hasSharedHeap:
       atomicInc(gch.recGcLock, 1)
     else:
       inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock > 0: 
+    if gch.recGcLock > 0:
       when hasThreadSupport and hasSharedHeap:
         atomicDec(gch.recGcLock, 1)
       else:
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index e599668a7..aff0c0e6f 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -11,7 +11,7 @@ when declared(NimString):
   # we are in system module:
   {.pragma: codegenType, compilerproc.}
 else:
-  {.pragma: codegenType.}
+  {.pragma: codegenType, importc.}
 
 type 
   # This should be he same as ast.TTypeKind
@@ -26,7 +26,7 @@ type
     tyExpr,
     tyStmt,
     tyTypeDesc,
-    tyGenericInvokation, # ``T[a, b]`` for types to invoke
+    tyGenericInvocation, # ``T[a, b]`` for types to invoke
     tyGenericBody,       # ``T[a, b, body]`` last parameter is the body
     tyGenericInst,       # ``T[a, b, realInstance]`` instantiated generic type
     tyGenericParam,      # ``a`` in the example
@@ -65,7 +65,7 @@ type
     tyBigNum,
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
-  TNimNode {.codegenType, final.} = object
+  TNimNode {.codegenType.} = object
     kind: TNimNodeKind
     offset: int
     typ: ptr TNimType
@@ -78,7 +78,7 @@ type
     ntfAcyclic = 1,    # type cannot form a cycle
     ntfEnumHole = 2    # enum has holes and thus `$` for them needs the slow
                        # version
-  TNimType {.codegenType, final.} = object
+  TNimType {.codegenType.} = object
     size: int
     kind: TNimKind
     flags: set[TNimTypeFlag]
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index aac802229..dbc961402 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 9b4a7d556..3b55f62ca 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -128,7 +128,7 @@ proc raiseOverflow {.exportc: "raiseOverflow", noreturn.} =
   raise newException(OverflowError, "over- or underflow")
 
 proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn.} =
-  raise newException(DivByZeroError, "divison by zero")
+  raise newException(DivByZeroError, "division by zero")
 
 proc raiseRangeError() {.compilerproc, noreturn.} =
   raise newException(RangeError, "value out of range")
@@ -322,10 +322,10 @@ when defined(kwin):
       }
       print(buf);
     """
-    
+
 elif defined(nodejs):
   proc ewriteln(x: cstring) = log(x)
-  
+
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
     asm """
       var buf = "";
@@ -339,12 +339,12 @@ else:
   var
     document {.importc, nodecl.}: ref TDocument
 
-  proc ewriteln(x: cstring) = 
+  proc ewriteln(x: cstring) =
     var node = document.getElementsByTagName("body")[0]
-    if node != nil: 
+    if node != nil:
       node.appendChild(document.createTextNode(x))
       node.appendChild(document.createElement("br"))
-    else: 
+    else:
       raise newException(ValueError, "<body> element does not exist yet!")
 
   proc rawEcho {.compilerproc.} =
@@ -563,7 +563,11 @@ proc nimCopy(x: pointer, ti: PNimType): pointer =
       }
     """
   of tyString:
-    asm "`result` = `x`.slice(0);"
+    asm """
+      if (`x` !== null) {
+        `result` = `x`.slice(0);
+      }
+    """
   else:
     result = x
 
@@ -679,7 +683,7 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat, start = 0): int {
   if s[i] == 'I' or s[i] == 'i':
     if s[i+1] == 'N' or s[i+1] == 'n':
       if s[i+2] == 'F' or s[i+2] == 'f':
-        if s[i+3] notin IdentChars: 
+        if s[i+3] notin IdentChars:
           number = Inf*sign
           return i+3 - start
     return 0
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index e091c0889..a378f86e7 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -23,7 +23,7 @@ const
   leakDetector = false
   overwriteFree = false
   trackAllocationSource = leakDetector
-  
+
   cycleGC = true # (de)activate the cycle GC
   reallyDealloc = true # for debugging purposes this can be set to false
   reallyOsDealloc = true
@@ -71,13 +71,13 @@ when defined(boehmgc):
     const boehmLib = "libgc.dylib"
   else:
     const boehmLib = "/usr/lib/libgc.so.1"
-    
+
   proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.}
-  proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.} 
-  proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.} 
+  proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.}
+  proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.}
   proc boehmGCincremental {.
-    importc: "GC_enable_incremental", dynlib: boehmLib.} 
-  proc boehmGCfullCollect {.importc: "GC_gcollect", dynlib: boehmLib.}  
+    importc: "GC_enable_incremental", dynlib: boehmLib.}
+  proc boehmGCfullCollect {.importc: "GC_gcollect", dynlib: boehmLib.}
   proc boehmAlloc(size: int): pointer {.
     importc: "GC_malloc", dynlib: boehmLib.}
   proc boehmAllocAtomic(size: int): pointer {.
@@ -85,7 +85,7 @@ when defined(boehmgc):
   proc boehmRealloc(p: pointer, size: int): pointer {.
     importc: "GC_realloc", dynlib: boehmLib.}
   proc boehmDealloc(p: pointer) {.importc: "GC_free", dynlib: boehmLib.}
-  
+
   proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", dynlib: boehmLib.}
     ## Return the number of bytes in the heap.  Excludes collector private
     ## data structures. Includes empty blocks and fragmentation loss.
@@ -108,25 +108,25 @@ when defined(boehmgc):
     zeroMem(result, size)
 
   when not defined(useNimRtl):
-    
-    proc alloc(size: int): pointer =
+
+    proc alloc(size: Natural): pointer =
       result = boehmAlloc(size)
       if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
+    proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc realloc(p: pointer, newsize: int): pointer =
+    proc realloc(p: pointer, newsize: Natural): pointer =
       result = boehmRealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc dealloc(p: pointer) = boehmDealloc(p)
-    
-    proc allocShared(size: int): pointer =
+
+    proc allocShared(size: Natural): pointer =
       result = boehmAlloc(size)
       if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
+    proc allocShared0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc reallocShared(p: pointer, newsize: int): pointer =
+    proc reallocShared(p: pointer, newsize: Natural): pointer =
       result = boehmRealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc deallocShared(p: pointer) = boehmDealloc(p)
@@ -144,18 +144,18 @@ when defined(boehmgc):
     proc GC_disable() = boehmGC_disable()
     proc GC_enable() = boehmGC_enable()
     proc GC_fullCollect() = boehmGCfullCollect()
-    proc GC_setStrategy(strategy: TGC_Strategy) = discard
+    proc GC_setStrategy(strategy: GC_Strategy) = discard
     proc GC_enableMarkAndSweep() = discard
     proc GC_disableMarkAndSweep() = discard
     proc GC_getStatistics(): string = return ""
-    
+
     proc getOccupiedMem(): int = return boehmGetHeapSize()-boehmGetFreeBytes()
     proc getFreeMem(): int = return boehmGetFreeBytes()
     proc getTotalMem(): int = return boehmGetHeapSize()
 
     proc setStackBottom(theStackBottom: pointer) = discard
 
-  proc initGC() = 
+  proc initGC() =
     when defined(macosx): boehmGCinit()
 
   proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
@@ -171,7 +171,7 @@ when defined(boehmgc):
 
   proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
   proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-  
+
   proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
     dest[] = src
   proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
@@ -181,39 +181,39 @@ when defined(boehmgc):
 
   type
     TMemRegion = object {.final, pure.}
-  
+
   proc alloc(r: var TMemRegion, size: int): pointer =
     result = boehmAlloc(size)
     if result == nil: raiseOutOfMem()
   proc alloc0(r: var TMemRegion, size: int): pointer =
     result = alloc(size)
     zeroMem(result, size)
-  proc dealloc(r: var TMemRegion, p: pointer) = boehmDealloc(p)  
+  proc dealloc(r: var TMemRegion, p: pointer) = boehmDealloc(p)
   proc deallocOsPages(r: var TMemRegion) {.inline.} = discard
   proc deallocOsPages() {.inline.} = discard
 
   include "system/cellsets"
 elif defined(nogc) and defined(useMalloc):
-  
+
   when not defined(useNimRtl):
-    proc alloc(size: int): pointer =
+    proc alloc(size: Natural): pointer =
       result = cmalloc(size)
       if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
+    proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc realloc(p: pointer, newsize: int): pointer =
+    proc realloc(p: pointer, newsize: Natural): pointer =
       result = crealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc dealloc(p: pointer) = cfree(p)
-    
-    proc allocShared(size: int): pointer =
+
+    proc allocShared(size: Natural): pointer =
       result = cmalloc(size)
       if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
+    proc allocShared0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc reallocShared(p: pointer, newsize: int): pointer =
+    proc reallocShared(p: pointer, newsize: Natural): pointer =
       result = crealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc deallocShared(p: pointer) = cfree(p)
@@ -221,15 +221,15 @@ elif defined(nogc) and defined(useMalloc):
     proc GC_disable() = discard
     proc GC_enable() = discard
     proc GC_fullCollect() = discard
-    proc GC_setStrategy(strategy: TGC_Strategy) = discard
+    proc GC_setStrategy(strategy: GC_Strategy) = discard
     proc GC_enableMarkAndSweep() = discard
     proc GC_disableMarkAndSweep() = discard
     proc GC_getStatistics(): string = return ""
-    
+
     proc getOccupiedMem(): int = discard
     proc getFreeMem(): int = discard
     proc getTotalMem(): int = discard
-    
+
     proc setStackBottom(theStackBottom: pointer) = discard
 
   proc initGC() = discard
@@ -240,13 +240,15 @@ elif defined(nogc) and defined(useMalloc):
     result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
     cast[PGenericSeq](result).len = len
     cast[PGenericSeq](result).reserved = len
+  proc newObjNoInit(typ: PNimType, size: int): pointer =
+    result = alloc(size)
 
   proc growObj(old: pointer, newsize: int): pointer =
     result = realloc(old, newsize)
 
   proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
   proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-  
+
   proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
     dest[] = src
   proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
@@ -256,7 +258,7 @@ elif defined(nogc) and defined(useMalloc):
 
   type
     TMemRegion = object {.final, pure.}
-  
+
   proc alloc(r: var TMemRegion, size: int): pointer =
     result = alloc(size)
   proc alloc0(r: var TMemRegion, size: int): pointer =
@@ -272,23 +274,27 @@ elif defined(nogc):
   # object, because C does not support this operation... Even though every
   # possible implementation has to have a way to determine the object's size.
   # C just sucks.
-  when appType == "lib": 
+  when appType == "lib":
     {.warning: "nogc in a library context may not work".}
-  
+
   include "system/alloc"
 
   proc initGC() = discard
   proc GC_disable() = discard
   proc GC_enable() = discard
   proc GC_fullCollect() = discard
-  proc GC_setStrategy(strategy: TGC_Strategy) = discard
+  proc GC_setStrategy(strategy: GC_Strategy) = discard
   proc GC_enableMarkAndSweep() = discard
   proc GC_disableMarkAndSweep() = discard
   proc GC_getStatistics(): string = return ""
-  
-  
+
+
   proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
     result = alloc0(size)
+
+  proc newObjNoInit(typ: PNimType, size: int): pointer =
+    result = alloc(size)
+
   proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
     result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
     cast[PGenericSeq](result).len = len
@@ -299,7 +305,7 @@ elif defined(nogc):
   proc setStackBottom(theStackBottom: pointer) = discard
   proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
   proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
-  
+
   proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
     dest[] = src
   proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
@@ -327,6 +333,6 @@ else:
     include "system/gc"
   else:
     include "system/gc"
-  
+
 {.pop.}
 
diff --git a/lib/system/platforms.nim b/lib/system/platforms.nim
index 3ec6a270e..47a01d5fe 100644
--- a/lib/system/platforms.nim
+++ b/lib/system/platforms.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 96ab6abc7..6d6863caa 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -51,7 +51,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
 
 when defined(memProfiler):
   type
-    TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.}
+    TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall, benign.}
   var
     profilerHook*: TMemProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 2de603cea..f1029ff6a 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -30,14 +30,16 @@ proc reprStrAux(result: var string, s: string) =
     add result, "nil"
     return
   add result, reprPointer(cast[pointer](s)) & "\""
-  for c in items(s):
+  for i in 0.. <s.len:
+    let c = s[i]
     case c
     of '"': add result, "\\\""
     of '\\': add result, "\\\\" # BUGFIX: forgotten
     of '\10': add result, "\\10\"\n\"" # " \n " # better readability
     of '\128' .. '\255', '\0'..'\9', '\11'..'\31':
       add result, "\\" & reprInt(ord(c))
-    else: result.add(c)
+    else:
+      result.add(c)
   add result, "\""
 
 proc reprStr(s: string): string {.compilerRtl.} =
@@ -78,7 +80,7 @@ proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
 type
   PByteArray = ptr array[0.. 0xffff, int8]
 
-proc addSetElem(result: var string, elem: int, typ: PNimType) {.gcsafe.} =
+proc addSetElem(result: var string, elem: int, typ: PNimType) {.benign.} =
   case typ.kind
   of tyEnum: add result, reprEnum(elem, typ)
   of tyBool: add result, reprBool(bool(elem))
@@ -147,7 +149,7 @@ when not defined(useNimRtl):
     for i in 0..cl.indent-1: add result, ' '
 
   proc reprAux(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) {.gcsafe.}
+               cl: var TReprClosure) {.benign.}
 
   proc reprArray(result: var string, p: pointer, typ: PNimType,
                  cl: var TReprClosure) =
@@ -172,7 +174,7 @@ when not defined(useNimRtl):
     add result, "]"
 
   proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode,
-                     cl: var TReprClosure) {.gcsafe.} =
+                     cl: var TReprClosure) {.benign.} =
     case n.kind
     of nkNone: sysAssert(false, "reprRecordAux")
     of nkSlot:
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index 7908fbe4d..3f860655e 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -16,7 +16,7 @@
                        # of the standard library!
 
 
-proc fputs(c: cstring, f: File) {.importc: "fputs", header: "<stdio.h>", 
+proc fputs(c: cstring, f: File) {.importc: "fputs", header: "<stdio.h>",
   tags: [WriteIOEffect].}
 proc fgets(c: cstring, n: int, f: File): cstring {.
   importc: "fgets", header: "<stdio.h>", tags: [ReadIOEffect].}
@@ -26,11 +26,30 @@ proc ungetc(c: cint, f: File) {.importc: "ungetc", header: "<stdio.h>",
   tags: [].}
 proc putc(c: char, stream: File) {.importc: "putc", header: "<stdio.h>",
   tags: [WriteIOEffect].}
-proc fprintf(f: File, frmt: cstring) {.importc: "fprintf", 
+proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
   header: "<stdio.h>", varargs, tags: [WriteIOEffect].}
 proc strlen(c: cstring): int {.
   importc: "strlen", header: "<string.h>", tags: [].}
 
+when defined(posix):
+  proc getc_unlocked(stream: File): cint {.importc: "getc_unlocked",
+    header: "<stdio.h>", tags: [ReadIOEffect].}
+
+  proc flockfile(stream: File) {.importc: "flockfile", header: "<stdio.h>",
+    tags: [ReadIOEffect].}
+
+  proc funlockfile(stream: File) {.importc: "funlockfile", header: "<stdio.h>",
+    tags: [ReadIOEffect].}
+elif false:
+  # doesn't work on Windows yet:
+  proc getc_unlocked(stream: File): cint {.importc: "_fgetc_nolock",
+    header: "<stdio.h>", tags: [ReadIOEffect].}
+
+  proc flockfile(stream: File) {.importc: "_lock_file", header: "<stdio.h>",
+    tags: [ReadIOEffect].}
+
+  proc funlockfile(stream: File) {.importc: "_unlock_file", header: "<stdio.h>",
+    tags: [ReadIOEffect].}
 
 # C routine that is used here:
 proc fread(buf: pointer, size, n: int, f: File): int {.
@@ -67,39 +86,57 @@ const
 proc raiseEIO(msg: string) {.noinline, noreturn.} =
   sysFatal(IOError, msg)
 
-proc readLine(f: File, line: var TaintedString): bool =
-  # of course this could be optimized a bit; but IO is slow anyway...
-  # and it was difficult to get this CORRECT with Ansi C's methods
-  setLen(line.string, 0) # reuse the buffer!
-  while true:
-    var c = fgetc(f)
-    if c < 0'i32:
-      if line.len > 0: break
-      else: return false
-    if c == 10'i32: break # LF
-    if c == 13'i32:  # CR
-      c = fgetc(f) # is the next char LF?
-      if c != 10'i32: ungetc(c, f) # no, put the character back
-      break
-    add line.string, chr(int(c))
-  result = true
+when declared(getc_unlocked):
+  proc readLine(f: File, line: var TaintedString): bool =
+    setLen(line.string, 0) # reuse the buffer!
+    flockfile(f)
+    while true:
+      var c = getc_unlocked(f)
+      if c < 0'i32:
+        if line.len > 0: break
+        else: return false
+      if c == 10'i32: break # LF
+      if c == 13'i32:  # CR
+        c = getc_unlocked(f) # is the next char LF?
+        if c != 10'i32: ungetc(c, f) # no, put the character back
+        break
+      add line.string, chr(int(c))
+    result = true
+    funlockfile(f)
+else:
+  proc readLine(f: File, line: var TaintedString): bool =
+    # of course this could be optimized a bit; but IO is slow anyway...
+    # and it was difficult to get this CORRECT with Ansi C's methods
+    setLen(line.string, 0) # reuse the buffer!
+    while true:
+      var c = fgetc(f)
+      if c < 0'i32:
+        if line.len > 0: break
+        else: return false
+      if c == 10'i32: break # LF
+      if c == 13'i32:  # CR
+        c = fgetc(f) # is the next char LF?
+        if c != 10'i32: ungetc(c, f) # no, put the character back
+        break
+      add line.string, chr(int(c))
+    result = true
 
 proc readLine(f: File): TaintedString =
   result = TaintedString(newStringOfCap(80))
   if not readLine(f, result): raiseEIO("EOF reached")
 
-proc write(f: File, i: int) = 
+proc write(f: File, i: int) =
   when sizeof(int) == 8:
     fprintf(f, "%lld", i)
   else:
     fprintf(f, "%ld", i)
 
-proc write(f: File, i: BiggestInt) = 
+proc write(f: File, i: BiggestInt) =
   when sizeof(BiggestInt) == 8:
     fprintf(f, "%lld", i)
   else:
     fprintf(f, "%ld", i)
-    
+
 proc write(f: File, b: bool) =
   if b: write(f, "true")
   else: write(f, "false")
@@ -110,7 +147,7 @@ proc write(f: File, c: char) = putc(c, f)
 proc write(f: File, a: varargs[string, `$`]) =
   for x in items(a): write(f, x)
 
-proc readAllBuffer(file: File): string = 
+proc readAllBuffer(file: File): string =
   # This proc is for File we want to read but don't know how many
   # bytes we need to read before the buffer is empty.
   result = ""
@@ -123,8 +160,8 @@ proc readAllBuffer(file: File): string =
       buffer.setLen(bytesRead)
       result.add(buffer)
       break
-  
-proc rawFileSize(file: File): int = 
+
+proc rawFileSize(file: File): int =
   # this does not raise an error opposed to `getFileSize`
   var oldPos = ftell(file)
   discard fseek(file, 0, 2) # seek the end of the file
@@ -132,7 +169,7 @@ proc rawFileSize(file: File): int =
   discard fseek(file, clong(oldPos), 0)
 
 proc readAllFile(file: File, len: int): string =
-  # We aquire the filesize beforehand and hope it doesn't change.
+  # We acquire the filesize beforehand and hope it doesn't change.
   # Speeds things up.
   result = newString(int(len))
   if readBuffer(file, addr(result[0]), int(len)) != len:
@@ -141,20 +178,20 @@ proc readAllFile(file: File, len: int): string =
 proc readAllFile(file: File): string =
   var len = rawFileSize(file)
   result = readAllFile(file, len)
-  
-proc readAll(file: File): TaintedString = 
+
+proc readAll(file: File): TaintedString =
   # Separate handling needed because we need to buffer when we
   # don't know the overall length of the File.
-  var len = rawFileSize(file)
-  if len >= 0:
+  let len = if file != stdin: rawFileSize(file) else: -1
+  if len > 0:
     result = readAllFile(file, len).TaintedString
   else:
     result = readAllBuffer(file).TaintedString
-  
+
 proc readFile(filename: string): TaintedString =
   var f = open(filename)
   try:
-    result = readAllFile(f).TaintedString
+    result = readAll(f).TaintedString
   finally:
     close(f)
 
@@ -183,11 +220,17 @@ proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
 when (defined(windows) and not defined(useWinAnsi)) or defined(nimdoc):
   include "system/widestrs"
 
-when defined(windows) and not defined(useWinAnsi):  
-  proc wfopen(filename, mode: WideCString): pointer {.
-    importc: "_wfopen", nodecl.}
-  proc wfreopen(filename, mode: WideCString, stream: File): File {.
-    importc: "_wfreopen", nodecl.}
+when defined(windows) and not defined(useWinAnsi):
+  when defined(cpp):
+    proc wfopen(filename, mode: WideCString): pointer {.
+      importcpp: "_wfopen((const wchar_t*)#, (const wchar_t*)#)", nodecl.}
+    proc wfreopen(filename, mode: WideCString, stream: File): File {.
+      importcpp: "_wfreopen((const wchar_t*)#, (const wchar_t*)#, #)", nodecl.}
+  else:
+    proc wfopen(filename, mode: WideCString): pointer {.
+      importc: "_wfopen", nodecl.}
+    proc wfreopen(filename, mode: WideCString, stream: File): File {.
+      importc: "_wfreopen", nodecl.}
 
   proc fopen(filename, mode: cstring): pointer =
     var f = newWideCString(filename)
@@ -223,7 +266,7 @@ proc open(f: var File, filename: string,
     elif bufSize == 0:
       discard setvbuf(f, nil, IONBF, 0)
 
-proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool = 
+proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool =
   var p: pointer = freopen(filename, FormatOpen[mode], f)
   result = p != nil
 
@@ -237,23 +280,23 @@ proc open(f: var File, filehandle: FileHandle, mode: FileMode): bool =
 proc fwrite(buf: pointer, size, n: int, f: File): int {.
   importc: "fwrite", noDecl.}
 
-proc readBuffer(f: File, buffer: pointer, len: int): int =
+proc readBuffer(f: File, buffer: pointer, len: Natural): int =
   result = fread(buffer, 1, len, f)
 
-proc readBytes(f: File, a: var openArray[int8], start, len: int): int =
+proc readBytes(f: File, a: var openArray[int8|uint8], start, len: Natural): int =
   result = readBuffer(f, addr(a[start]), len)
 
-proc readChars(f: File, a: var openArray[char], start, len: int): int =
+proc readChars(f: File, a: var openArray[char], start, len: Natural): int =
   result = readBuffer(f, addr(a[start]), len)
 
 {.push stackTrace:off, profiler:off.}
-proc writeBytes(f: File, a: openArray[int8], start, len: int): int =
+proc writeBytes(f: File, a: openArray[int8|uint8], start, len: Natural): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
   result = writeBuffer(f, addr(x[start]), len)
-proc writeChars(f: File, a: openArray[char], start, len: int): int =
+proc writeChars(f: File, a: openArray[char], start, len: Natural): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
   result = writeBuffer(f, addr(x[start]), len)
-proc writeBuffer(f: File, buffer: pointer, len: int): int =
+proc writeBuffer(f: File, buffer: pointer, len: Natural): int =
   result = fwrite(buffer, 1, len, f)
 
 proc write(f: File, s: string) =
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
index 04f30872d..6f45f1509 100644
--- a/lib/system/sysspawn.nim
+++ b/lib/system/sysspawn.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2014 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 440d040a5..5b4020c8c 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -30,18 +30,30 @@ proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
   if a == b: return true
   if a == nil or b == nil: return false
   return a.len == b.len and
-    c_memcmp(a.data, b.data, a.len * sizeof(char)) == 0'i32
+    c_memcmp(a.data, b.data, a.len) == 0'i32
 
 when declared(allocAtomic):
   template allocStr(size: expr): expr =
     cast[NimString](allocAtomic(size))
+
+  template allocStrNoInit(size: expr): expr =
+    cast[NimString](boehmAllocAtomic(size))
 else:
   template allocStr(size: expr): expr =
     cast[NimString](newObj(addr(strDesc), size))
 
+  template allocStrNoInit(size: expr): expr =
+    cast[NimString](newObjNoInit(addr(strDesc), size))
+
+proc rawNewStringNoInit(space: int): NimString {.compilerProc.} =
+  var s = space
+  if s < 7: s = 7
+  result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
+  result.reserved = s
+
 proc rawNewString(space: int): NimString {.compilerProc.} =
   var s = space
-  if s < 8: s = 7
+  if s < 7: s = 7
   result = allocStr(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
 
@@ -53,10 +65,10 @@ proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
   var start = max(start, 0)
   var len = min(last, s.len-1) - start + 1
   if len > 0:
-    result = rawNewString(len)
+    result = rawNewStringNoInit(len)
     result.len = len
-    c_memcpy(result.data, addr(s.data[start]), len * sizeof(char))
-    #result.data[len] = '\0'
+    c_memcpy(result.data, addr(s.data[start]), len)
+    result.data[len] = '\0'
   else:
     result = rawNewString(len)
 
@@ -64,10 +76,9 @@ proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
   result = copyStrLast(s, start, s.len-1)
 
 proc toNimStr(str: cstring, len: int): NimString {.compilerProc.} =
-  result = rawNewString(len)
+  result = rawNewStringNoInit(len)
   result.len = len
-  c_memcpy(result.data, str, (len+1) * sizeof(char))
-  #result.data[len] = '\0' # readline relies on this!
+  c_memcpy(result.data, str, len + 1)
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
   result = toNimStr(str, c_strlen(str))
@@ -77,23 +88,24 @@ proc copyString(src: NimString): NimString {.compilerRtl.} =
     if (src.reserved and seqShallowFlag) != 0:
       result = src
     else:
-      result = rawNewString(src.space)
+      result = rawNewStringNoInit(src.len)
       result.len = src.len
-      c_memcpy(result.data, src.data, (src.len + 1) * sizeof(char))
+      c_memcpy(result.data, src.data, src.len + 1)
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
-    var s = src.space
-    if s < 8: s = 7
     when declared(newObjRC1):
+      var s = src.len
+      if s < 7: s = 7
       result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
                                s+1))
+      result.reserved = s
     else:
-      result = allocStr(sizeof(TGenericSeq) + s + 1)
-    result.reserved = s
+      result = rawNewStringNoInit(src.len)
     result.len = src.len
     c_memcpy(result.data, src.data, src.len + 1)
 
+
 proc hashString(s: string): int {.compilerproc.} =
   # the compiler needs exactly the same hash function!
   # this used to be used for efficient generation of string case statements
@@ -113,7 +125,7 @@ proc addChar(s: NimString, c: char): NimString =
   if result.len >= result.space:
     result.reserved = resize(result.space)
     result = cast[NimString](growObj(result,
-      sizeof(TGenericSeq) + (result.reserved+1) * sizeof(char)))
+      sizeof(TGenericSeq) + result.reserved + 1))
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
   inc(result.len)
@@ -157,7 +169,7 @@ proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
     result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
     result.reserved = sp
     #result = rawNewString(sp)
-    #copyMem(result, dest, dest.len * sizeof(char) + sizeof(TGenericSeq))
+    #copyMem(result, dest, dest.len + sizeof(TGenericSeq))
     # DO NOT UPDATE LEN YET: dest.len = newLen
 
 proc appendString(dest, src: NimString) {.compilerproc, inline.} =
@@ -203,12 +215,12 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
                                GenericSeqSize))
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
-    when not defined(boehmGC) and not defined(nogc) and 
+    when not defined(boehmGC) and not defined(nogc) and
          not defined(gcMarkAndSweep):
       when compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len
-          forAllChildrenAux(cast[pointer](cast[TAddress](result) +%
+          forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
                             GenericSeqSize +% (i*%elemSize)),
                             extGetCellType(result).base, waPush)
           let len1 = gch.tempStack.len
@@ -220,11 +232,11 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
           forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
                             GenericSeqSize +% (i*%elemSize)),
                             extGetCellType(result).base, waZctDecRef)
-      
+
     # XXX: zeroing out the memory can still result in crashes if a wiped-out
-    # cell is aliased by another pointer (ie proc paramter or a let variable).
+    # cell is aliased by another pointer (ie proc parameter or a let variable).
     # This is a tought problem, because even if we don't zeroMem here, in the
-    # presense of user defined destructors, the user will expect the cell to be
+    # presence of user defined destructors, the user will expect the cell to be
     # "destroyed" thus creating the same problem. We can destoy the cell in the
     # finalizer of the sequence, but this makes destruction non-deterministic.
     zeroMem(cast[pointer](cast[ByteAddress](result) +% GenericSeqSize +%
@@ -258,13 +270,23 @@ proc nimFloatToStr(f: float): string {.compilerproc.} =
     if buf[i] == ',':
       buf[i] = '.'
       hasDot = true
-    elif buf[i] in {'a'..'z', 'A'..'Z', '.'}: 
+    elif buf[i] in {'a'..'z', 'A'..'Z', '.'}:
       hasDot = true
   if not hasDot:
     buf[n] = '.'
     buf[n+1] = '0'
     buf[n+2] = '\0'
-  result = $buf
+  # On Windows nice numbers like '1.#INF', '-1.#INF' or '1.#NAN' are produced.
+  # We want to get rid of these here:
+  if buf[n-1] == 'N':
+    result = "nan"
+  elif buf[n-1] == 'F':
+    if buf[0] == '-':
+      result = "-inf"
+    else:
+      result = "inf"
+  else:
+    result = $buf
 
 proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
   header: "<stdlib.h>", noSideEffect.}
@@ -299,7 +321,7 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
   template addToBuf(c) =
     if ti < t.high:
       t[ti] = c; inc(ti)
-  
+
   # Sign?
   if s[i] == '+' or s[i] == '-':
     if s[i] == '-':
@@ -320,7 +342,7 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
   if s[i] == 'I' or s[i] == 'i':
     if s[i+1] == 'N' or s[i+1] == 'n':
       if s[i+2] == 'F' or s[i+2] == 'f':
-        if s[i+3] notin IdentChars: 
+        if s[i+3] notin IdentChars:
           number = Inf*sign
           return i+3 - start
     return 0
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
index 496c31af1..d8e011ecb 100644
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -84,8 +84,18 @@ when defined(windows):
       importc: "TlsAlloc", stdcall, header: "<windows.h>".}
     proc threadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
       importc: "TlsSetValue", stdcall, header: "<windows.h>".}
-    proc threadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
+    proc tlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
       importc: "TlsGetValue", stdcall, header: "<windows.h>".}
+
+    proc getLastError(): uint32 {.
+      importc: "GetLastError", stdcall, header: "<windows.h>".}
+    proc setLastError(x: uint32) {.
+      importc: "SetLastError", stdcall, header: "<windows.h>".}
+
+    proc threadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer =
+      let realLastError = getLastError()
+      result = tlsGetValue(dwTlsIndex)
+      setLastError(realLastError)
   else:
     proc threadVarAlloc(): TThreadVarSlot {.
       importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
@@ -95,7 +105,9 @@ when defined(windows):
       importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
   
 else:
-  {.passL: "-pthread".}
+  when not defined(macosx):
+    {.passL: "-pthread".}
+
   {.passC: "-pthread".}
 
   type
@@ -115,7 +127,7 @@ else:
     importc, header: "<pthread.h>".}
 
   proc pthread_create(a1: var TSysThread, a2: var TPthread_attr,
-            a3: proc (x: pointer) {.noconv.}, 
+            a3: proc (x: pointer): pointer {.noconv.}, 
             a4: pointer): cint {.importc: "pthread_create", 
             header: "<pthread.h>".}
   proc pthread_join(a1: TSysThread, a2: ptr pointer): cint {.
@@ -303,7 +315,7 @@ when defined(windows):
     threadProcWrapperBody(closure)
     # implicitly return 0
 else:
-  proc threadProcWrapper[TArg](closure: pointer) {.noconv.} = 
+  proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} = 
     threadProcWrapperBody(closure)
 {.pop.}
 
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index e58ff7adc..e5de791ac 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -27,9 +27,9 @@ when defined(windows):
   proc `-`(a, b: TTicks): TNanos =
     var frequency: int64
     QueryPerformanceFrequency(frequency)
-    var performanceCounterRate = 1000000000.0 / toFloat(frequency.int)
+    var performanceCounterRate = 1e+9'f64 / float64(frequency)
 
-    result = ((a.int64 - b.int64).int.toFloat * performanceCounterRate).TNanos
+    result = TNanos(float64(a.int64 - b.int64) * performanceCounterRate)
 
 elif defined(macosx):
   type