11 files changed, 596 insertions, 426 deletions
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 78995954f..55d7572e2 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -17,7 +17,7 @@ proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
     s = cast[ByteAddress](src)
   case n.kind
   of nkSlot:
-    genericAssignAux(cast[pointer](d +% n.offset), 
+    genericAssignAux(cast[pointer](d +% n.offset),
                      cast[pointer](s +% n.offset), n.typ, shallow)
   of nkList:
     for i in 0..n.len-1:
@@ -54,7 +54,7 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
       unsureAsgnRef(x, copyString(cast[NimString](s2)))
   of tySequence:
     var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)      
+    var seq = cast[PGenericSeq](s2)
     var x = cast[PPointer](dest)
     if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
       # this can happen! nil sequences are allowed
@@ -100,7 +100,7 @@ proc genericShallowAssign(dest, src: pointer, mt: PNimType) {.compilerProc.} =
 
 when false:
   proc debugNimType(t: PNimType) =
-    if t.isNil: 
+    if t.isNil:
       cprintf("nil!")
       return
     var k: cstring
@@ -170,21 +170,13 @@ proc objectInit(dest: pointer, typ: PNimType) =
     for i in 0..(typ.size div typ.base.size)-1:
       objectInit(cast[pointer](d +% i * typ.base.size), typ.base)
   else: discard # nothing to do
-  
+
 # ---------------------- assign zero -----------------------------------------
 
-when not defined(nimmixin):
-  proc destroy(x: int) = discard
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    for i in countup(0, r.len - 1): destroy(r[i])
-else:
-  # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be
-  # generic for now
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    mixin destroy
-    for i in countup(0, r.len - 1): destroy(r[i])
+proc nimDestroyRange[T](r: T) {.compilerProc.} =
+  # internal proc used for destroying sequences and arrays
+  mixin `=destroy`
+  for i in countup(0, r.len - 1): `=destroy`(r[i])
 
 proc genericReset(dest: pointer, mt: PNimType) {.compilerProc, benign.}
 proc genericResetAux(dest: pointer, n: ptr TNimNode) =
@@ -198,7 +190,7 @@ proc genericResetAux(dest: pointer, n: ptr TNimNode) =
     var m = selectBranch(dest, n)
     if m != nil: genericResetAux(dest, m)
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
-  
+
 proc genericReset(dest: pointer, mt: PNimType) =
   var d = cast[ByteAddress](dest)
   sysAssert(mt != nil, "genericReset 2")
@@ -218,15 +210,15 @@ proc genericReset(dest: pointer, mt: PNimType) =
   else:
     zeroMem(dest, mt.size) # set raw bits to zero
 
-proc selectBranch(discVal, L: int, 
+proc selectBranch(discVal, L: int,
                   a: ptr array [0..0x7fff, ptr TNimNode]): ptr TNimNode =
   result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
     var x = a[discVal]
     if x != nil: result = x
-  
-proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int, 
-                            a: ptr array [0..0x7fff, ptr TNimNode], 
+
+proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
+                            a: ptr array [0..0x7fff, ptr TNimNode],
                             L: int) {.compilerProc.} =
   var oldBranch = selectBranch(oldDiscVal, L, a)
   var newBranch = selectBranch(newDiscVal, L, a)
diff --git a/lib/system/debugger.nim b/lib/system/debugger.nim
index 63ccd770b..b18c61755 100644
--- a/lib/system/debugger.nim
+++ b/lib/system/debugger.nim
@@ -21,7 +21,7 @@ type
                            # only slots that are
                            # needed are allocated and not 10_000,
                            # except for the global data description.
-    f: Frame
+    f: TFrame
     slots: array[0..10_000, VarSlot]
 {.deprecated: [TVarSlot: VarSlot, TExtendedFrame: ExtendedFrame].}
 
@@ -66,7 +66,7 @@ var
   dbgBP: array[0..127, Breakpoint] # breakpoints
   dbgBPlen: int
   dbgBPbloom: int64  # we use a bloom filter to speed up breakpoint checking
-  
+
   dbgFilenames*: array[0..300, cstring] ## registered filenames;
                                         ## 'nil' terminated
   dbgFilenameLen: int
@@ -197,7 +197,7 @@ proc genericHashAux(dest: pointer, n: ptr TNimNode, shallow: bool,
     result = genericHashAux(cast[pointer](d +% n.offset), n.typ, shallow, h)
   of nkList:
     result = h
-    for i in 0..n.len-1: 
+    for i in 0..n.len-1:
       result = result !& genericHashAux(dest, n.sons[i], shallow, result)
   of nkCase:
     result = h !& hash(cast[pointer](d +% n.offset), n.typ.size)
@@ -205,7 +205,7 @@ proc genericHashAux(dest: pointer, n: ptr TNimNode, shallow: bool,
     if m != nil: result = genericHashAux(dest, m, shallow, result)
   of nkNone: sysAssert(false, "genericHashAux")
 
-proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool, 
+proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
                     h: Hash): Hash =
   sysAssert(mt != nil, "genericHashAux 2")
   case mt.kind
@@ -257,7 +257,7 @@ proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
 
 proc genericHash(dest: pointer, mt: PNimType): int =
   result = genericHashAux(dest, mt, false, 0)
-  
+
 proc dbgRegisterWatchpoint(address: pointer, name: cstring,
                            typ: PNimType) {.compilerproc.} =
   let L = watchPointsLen
@@ -285,7 +285,7 @@ var
     ## Only code compiled with the ``debugger:on`` switch calls this hook.
 
   dbgWatchpointHook*: proc (watchpointName: cstring) {.nimcall.}
-  
+
 proc checkWatchpoints =
   let L = watchPointsLen
   for i in 0.. <L:
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 5d2faa2d6..df28c1493 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -44,10 +44,12 @@ var
     # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
 
+proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
+
 proc popFrame {.compilerRtl, inl.} =
   framePtr = framePtr.prev
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} =
+proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
 proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ae8bb724f..0c632aeb1 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -16,6 +16,10 @@
 # Special care has been taken to avoid recursion as far as possible to avoid
 # stack overflows when traversing deep datastructures. It is well-suited
 # for soft real time applications (like games).
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -64,8 +68,16 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -154,7 +166,7 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -293,20 +305,6 @@ proc initGC() =
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 when useMarkForDebug or useBackupGc:
   type
     GlobalMarkerProc = proc () {.nimcall, benign.}
@@ -816,138 +814,7 @@ proc markThreadStacks(gch: var GcHeap) =
         sp = sp +% sizeof(pointer)
       it = it.next
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
+include gc_common
 
 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
@@ -956,10 +823,6 @@ when useMarkForDebug or useBackupGc:
   proc markStackAndRegistersForSweep(gch: var GcHeap) =
     forEachStackSlot(gch, stackMarkS)
 
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
 proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
@@ -1033,7 +896,8 @@ proc collectCTBody(gch: var GcHeap) =
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
@@ -1064,11 +928,19 @@ when useMarkForDebug or useBackupGc:
     markStackAndRegistersForSweep(gch)
     markGlobals(gch)
 
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
 proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
   if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
@@ -1137,8 +1009,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
new file mode 100644
index 000000000..c7dd667e4
--- /dev/null
+++ b/lib/system/gc_common.nim
@@ -0,0 +1,275 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+proc len(stack: ptr GcStack): int =
+  if stack == nil:
+    return 0
+
+  var s = stack
+  result = 1
+  while s.next != nil:
+    inc(result)
+    s = s.next
+
+when defined(nimCoroutines):
+  proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
+    var sp: pointer
+    if pos == nil:
+      var stackTop {.volatile.}: pointer
+      sp = addr(stackTop)
+    else:
+      sp = pos
+    result = abs(cast[int](sp) - cast[int](stackBottom))
+
+  proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
+    var sp {.volatile.}: pointer
+    var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    stack.starts = starts
+    stack.pos = addr sp
+    if gch.stack == nil:
+      gch.stack = stack
+    else:
+      stack.next = gch.stack
+      gch.stack.prev = stack
+      gch.stack = stack
+    # c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
+
+  proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        if stack.prev == nil:
+          if stack.next != nil:
+            stack.next.prev = nil
+          gch.stack = stack.next
+        else:
+          stack.prev.next = stack.next
+          if stack.next != nil:
+              stack.next.prev = stack.prev
+        dealloc(stack)
+        # echo "[GC] removed stack ", starts.repr
+        break
+      else:
+        stack = stack.next
+
+  proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        stack.pos = pos
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
+        return
+      stack = stack.next
+    gcAssert(false, "Current stack position does not belong to registered stack")
+else:
+  proc stackSize(): int {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+iterator items(stack: ptr GcStack): ptr GcStack =
+  var s = stack
+  while not isNil(s):
+    yield s
+    s = s.next
+
+var
+  localGcInitialized {.rtlThreadVar.}: bool
+
+proc setupForeignThreadGc*() =
+  ## call this if you registered a callback that will be run from a thread not
+  ## under your control. This has a cheap thread-local guard, so the GC for
+  ## this thread will only be initialized once per thread, no matter how often
+  ## it is called.
+  if not localGcInitialized:
+    localGcInitialized = true
+    var stackTop {.volatile.}: pointer
+    setStackBottom(addr(stackTop))
+    initGC()
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    when defined(nimCoroutines):
+      GC_addStack(theStackBottom)
+    else:
+      if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+      else:
+        var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+        var b = cast[ByteAddress](gch.stackBottom)
+        #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+        when stackIncreases:
+          gch.stackBottom = cast[pointer](min(a, b))
+        else:
+          gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+when defined(sparc): # For SPARC architecture.
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a JmpBuf in the generated C code
+      # in a platform independent way
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      for stack in items(gch.stack):
+        var b = cast[ByteAddress](stack.starts)
+        var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
+        var x = cast[ByteAddress](p)
+        if a <=% x and x <=% b:
+          return true
+  
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: Registers
+      getRegisters(registers)
+      for i in registers.low .. registers.high:
+        gcMark(gch, cast[PPointer](registers[i]))
+  
+      for stack in items(gch.stack):
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
+        var max = cast[ByteAddress](stack.starts)
+        var sp = cast[ByteAddress](stack.pos)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+  else:
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      var b = cast[ByteAddress](gch.stackBottom)
+      var a = cast[ByteAddress](stackTop)
+      var x = cast[ByteAddress](p)
+      result = a <=% x and x <=% b
+
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        var max = cast[ByteAddress](gch.stackBottom)
+        var sp = cast[ByteAddress](addr(registers))
+        when defined(amd64):
+          # words within the jmp_buf structure may not be properly aligned.
+          let regEnd = sp +% sizeof(registers)
+          while sp <% regEnd:
+            gcMark(gch, cast[PPointer](sp)[])
+            gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
+            sp = sp +% sizeof(pointer)
+        # Make sure sp is word-aligned
+        sp = sp and not (sizeof(pointer) - 1)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index ee80c61e9..d1aecb7a2 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -9,6 +9,10 @@
 
 # A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -44,8 +48,16 @@ type
     maxStackSize: int        # max stack size
     freedObjects: int        # max entries in cycle table
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap = object            # this contains the zero count and
                              # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -118,7 +130,6 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -169,20 +180,6 @@ proc initGC() =
       init(gch.allocated)
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
@@ -407,145 +404,14 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
     if objStart != nil:
       mark(gch, objStart)
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
+include gc_common
 
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
 proc collectCTBody(gch: var GcHeap) =
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
   markGlobals(gch)
@@ -599,8 +465,13 @@ when not defined(useNimRtl):
              "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+             "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index dbc961402..d0dc38284 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -33,7 +33,7 @@ elif defined(useNimRtl):
   when defined(windows): 
     const nimrtl* = "nimrtl.dll"
   elif defined(macosx):
-    const nimrtl* = "nimrtl.dylib"
+    const nimrtl* = "libnimrtl.dylib"
   else: 
     const nimrtl* = "libnimrtl.so"
   {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl, gcsafe.}
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 242f42c16..a6839c48e 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -532,15 +532,17 @@ proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 type NimString = string # hack for hti.nim
 include "system/hti"
 
+type JSRef = int # Fake type.
+
 proc isFatPointer(ti: PNimType): bool =
   # This has to be consistent with the code generator!
   return ti.base.kind notin {tyObject,
     tyArray, tyArrayConstr, tyTuple,
     tyOpenArray, tySet, tyVar, tyRef, tyPtr}
 
-proc nimCopy(dest, src: pointer, ti: PNimType): pointer {.compilerproc.}
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef {.compilerproc.}
 
-proc nimCopyAux(dest, src: pointer, n: ptr TNimNode) {.compilerproc.} =
+proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
   of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
@@ -562,7 +564,7 @@ proc nimCopyAux(dest, src: pointer, n: ptr TNimNode) {.compilerproc.} =
       }
     """
 
-proc nimCopy(dest, src: pointer, ti: PNimType): pointer =
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
   of tyPtr, tyRef, tyVar, tyNil:
     if not isFatPointer(ti):
@@ -603,12 +605,11 @@ proc nimCopy(dest, src: pointer, ti: PNimType): pointer =
   else:
     result = src
 
-proc genericReset(x: pointer, ti: PNimType): pointer {.compilerproc.} =
+proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
+  asm "`result` = null;"
   case ti.kind
   of tyPtr, tyRef, tyVar, tyNil:
-    if not isFatPointer(ti):
-      result = nil
-    else:
+    if isFatPointer(ti):
       asm """
         `result` = [null, 0];
       """
@@ -633,9 +634,9 @@ proc genericReset(x: pointer, ti: PNimType): pointer {.compilerproc.} =
       }
     """
   else:
-    result = nil
+    discard
 
-proc arrayConstr(len: int, value: pointer, typ: PNimType): pointer {.
+proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
                  asmNoStackFrame, compilerproc.} =
   # types are fake
   asm """
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
new file mode 100644
index 000000000..c714f88ee
--- /dev/null
+++ b/lib/system/nimscript.nim
@@ -0,0 +1,164 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+
+# Nim's configuration system now uses Nim for scripting. This module provides
+# a few things that are required for this to work.
+
+template builtin = discard
+
+proc listDirs*(dir: string): seq[string] = builtin
+proc listFiles*(dir: string): seq[string] = builtin
+
+proc removeDir(dir: string) = builtin
+proc removeFile(dir: string) = builtin
+proc moveFile(src, dest: string) = builtin
+proc createDir(dir: string) = builtin
+proc getOsError: string = builtin
+proc setCurrentDir(dir: string) = builtin
+proc getCurrentDir(): string = builtin
+proc paramStr*(i: int): string = builtin
+proc paramCount*(): int = builtin
+
+proc switch*(key: string, val="") = builtin
+proc getCommand*(): string = builtin
+proc setCommand*(cmd: string) = builtin
+proc cmpIgnoreStyle(a, b: string): int = builtin
+
+proc strip(s: string): string =
+  var i = 0
+  while s[i] in {' ', '\c', '\L'}: inc i
+  result = s.substr(i)
+
+template `--`*(key, val: untyped) = switch(astToStr(key), strip astToStr(val))
+template `--`*(key: untyped) = switch(astToStr(key), "")
+
+type
+  ScriptMode* {.pure.} = enum
+    Silent,
+    Verbose,
+    Whatif
+
+var
+  mode*: ScriptMode ## Set this to influence how mkDir, rmDir, rmFile etc.
+                    ## behave
+
+template checkOsError =
+  let err = getOsError()
+  if err.len > 0: raise newException(OSError, err)
+
+template log(msg: string, body: untyped) =
+  if mode == ScriptMode.Verbose or mode == ScriptMode.Whatif:
+    echo "[NimScript] ", msg
+  if mode != ScriptMode.WhatIf:
+    body
+
+proc rmDir*(dir: string) {.raises: [OSError].} =
+  log "rmDir: " & dir:
+    removeDir dir
+    checkOsError()
+
+proc rmFile*(dir: string) {.raises: [OSError].} =
+  log "rmFile: " & dir:
+    removeFile dir
+    checkOsError()
+
+proc mkDir*(dir: string) {.raises: [OSError].} =
+  log "mkDir: " & dir:
+    createDir dir
+    checkOsError()
+
+proc mvFile*(`from`, to: string) {.raises: [OSError].} =
+  log "mvFile: " & `from` & ", " & to:
+    moveFile `from`, to
+    checkOsError()
+
+proc exec*(command: string, input = "", cache = "") =
+  ## Executes an external process.
+  log "exec: " & command:
+    echo staticExec(command, input, cache)
+
+proc put*(key, value: string) =
+  ## Sets a configuration 'key' like 'gcc.options.always' to its value.
+  builtin
+
+proc get*(key: string): string =
+  ## Retrieves a configuration 'key' like 'gcc.options.always'.
+  builtin
+
+proc exists*(key: string): bool =
+  ## Checks for the existance of a configuration 'key'
+  ## like 'gcc.options.always'.
+  builtin
+
+proc nimcacheDir*(): string =
+  ## Retrieves the location of 'nimcache'.
+  builtin
+
+proc thisDir*(): string =
+  ## Retrieves the location of the current ``nims`` script file.
+  builtin
+
+proc cd*(dir: string) {.raises: [OSError].} =
+  ## Changes the current directory.
+  ##
+  ## The change is permanent for the rest of the execution, since this is just
+  ## a shortcut for `os.setCurrentDir()
+  ## <http://nim-lang.org/os.html#setCurrentDir,string>`_ . Use the `withDir()
+  ## <#withDir>`_ template if you want to perform a temporary change only.
+  setCurrentDir(dir)
+  checkOsError()
+
+template withDir*(dir: string; body: untyped): untyped =
+  ## Changes the current directory temporarily.
+  ##
+  ## If you need a permanent change, use the `cd() <#cd>`_ proc. Usage example:
+  ##
+  ## .. code-block:: nimrod
+  ##   withDir "foo":
+  ##     # inside foo
+  ##   #back to last dir
+  var curDir = getCurrentDir()
+  try:
+    cd(dir)
+    body
+  finally:
+    cd(curDir)
+
+template `==?`(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
+
+proc writeTask(name, desc: string) =
+  if desc.len > 0:
+    var spaces = " "
+    for i in 0 ..< 20 - name.len: spaces.add ' '
+    echo name, spaces, desc
+
+template task*(name: untyped; description: string; body: untyped): untyped =
+  ## Defines a task. Hidden tasks are supported via an empty description.
+  proc `name Task`() = body
+
+  let cmd = getCommand()
+  if cmd.len == 0 or cmd ==? "help":
+    setCommand "help"
+    writeTask(astToStr(name), description)
+  elif cmd ==? astToStr(name):
+    setCommand "nop"
+    `name Task`()
+
+var
+  packageName* = ""
+  version*, author*, description*, license*, srcdir*,
+    binDir*, backend*: string
+
+  skipDirs*, skipFiles*, skipExt*, installDirs*, installFiles*,
+    installExt*, bin*: seq[string]
+  requiresData*: seq[string]
+
+proc requires*(deps: varargs[string]) =
+  for d in deps: requiresData.add(d)
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index 5464ee126..f51354da8 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -31,26 +31,6 @@ proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
 proc strlen(c: cstring): int {.
   importc: "strlen", header: "<string.h>", tags: [].}
 
-when defined(posix):
-  proc getc_unlocked(stream: File): cint {.importc: "getc_unlocked",
-    header: "<stdio.h>", tags: [ReadIOEffect].}
-
-  proc flockfile(stream: File) {.importc: "flockfile", header: "<stdio.h>",
-    tags: [ReadIOEffect].}
-
-  proc funlockfile(stream: File) {.importc: "funlockfile", header: "<stdio.h>",
-    tags: [ReadIOEffect].}
-elif false:
-  # doesn't work on Windows yet:
-  proc getc_unlocked(stream: File): cint {.importc: "_fgetc_nolock",
-    header: "<stdio.h>", tags: [ReadIOEffect].}
-
-  proc flockfile(stream: File) {.importc: "_lock_file", header: "<stdio.h>",
-    tags: [ReadIOEffect].}
-
-  proc funlockfile(stream: File) {.importc: "_unlock_file", header: "<stdio.h>",
-    tags: [ReadIOEffect].}
-
 # C routine that is used here:
 proc fread(buf: pointer, size, n: int, f: File): int {.
   importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
@@ -59,6 +39,10 @@ proc fseek(f: File, offset: clong, whence: int): int {.
 proc ftell(f: File): int {.importc: "ftell", header: "<stdio.h>", tags: [].}
 proc setvbuf(stream: File, buf: pointer, typ, size: cint): cint {.
   importc, header: "<stdio.h>", tags: [].}
+proc memchr(s: pointer, c: cint, n: csize): pointer {.
+  importc: "memchr", header: "<string.h>", tags: [].}
+proc memset(s: pointer, c: cint, n: csize) {.
+  header: "<string.h>", importc: "memset", tags: [].}
 
 {.push stackTrace:off, profiler:off.}
 proc write(f: File, c: cstring) = fputs(c, f)
@@ -86,40 +70,35 @@ const
 proc raiseEIO(msg: string) {.noinline, noreturn.} =
   sysFatal(IOError, msg)
 
-when declared(getc_unlocked):
-  proc readLine(f: File, line: var TaintedString): bool =
-    setLen(line.string, 0) # reuse the buffer!
-    flockfile(f)
-    while true:
-      var c = getc_unlocked(f)
-      if c < 0'i32:
-        if line.len > 0: break
-        else: return false
-      if c == 10'i32: break # LF
-      if c == 13'i32:  # CR
-        c = getc_unlocked(f) # is the next char LF?
-        if c != 10'i32: ungetc(c, f) # no, put the character back
-        break
-      add line.string, chr(int(c))
-    result = true
-    funlockfile(f)
-else:
-  proc readLine(f: File, line: var TaintedString): bool =
-    # of course this could be optimized a bit; but IO is slow anyway...
-    # and it was difficult to get this CORRECT with Ansi C's methods
-    setLen(line.string, 0) # reuse the buffer!
-    while true:
-      var c = fgetc(f)
-      if c < 0'i32:
-        if line.len > 0: break
-        else: return false
-      if c == 10'i32: break # LF
-      if c == 13'i32:  # CR
-        c = fgetc(f) # is the next char LF?
-        if c != 10'i32: ungetc(c, f) # no, put the character back
-        break
-      add line.string, chr(int(c))
-    result = true
+proc readLine(f: File, line: var TaintedString): bool =
+  var pos = 0
+  # Use the currently reserved space for a first try
+  when defined(nimscript):
+    var space = 80
+  else:
+    var space = cast[PGenericSeq](line.string).space
+  line.string.setLen(space)
+
+  while true:
+    # memset to \l so that we can tell how far fgets wrote, even on EOF, where
+    # fgets doesn't append an \l
+    memset(addr line.string[pos], '\l'.ord, space)
+    if fgets(addr line.string[pos], space, f) == nil:
+      line.string.setLen(0)
+      return false
+    let m = memchr(addr line.string[pos], '\l'.ord, space)
+    if m != nil:
+      # \l found: Could be our own or the one by fgets, in any case, we're done
+      let last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
+      if last > 0 and line.string[last-1] == '\c':
+        line.string.setLen(last-1)
+        return true
+      line.string.setLen(last)
+      return true
+    # No \l found: Increase buffer and read more
+    inc pos, space
+    space = 128 # read in 128 bytes at a time
+    line.string.setLen(pos+space)
 
 proc readLine(f: File): TaintedString =
   result = TaintedString(newStringOfCap(80))
@@ -182,7 +161,10 @@ proc readAllFile(file: File): string =
 proc readAll(file: File): TaintedString =
   # Separate handling needed because we need to buffer when we
   # don't know the overall length of the File.
-  let len = if file != stdin: rawFileSize(file) else: -1
+  when declared(stdin):
+    let len = if file != stdin: rawFileSize(file) else: -1
+  else:
+    let len = rawFileSize(file)
   if len > 0:
     result = readAllFile(file, len).TaintedString
   else:
@@ -216,9 +198,9 @@ proc writeLine[Ty](f: File, x: varargs[Ty, `$`]) =
   for i in items(x): write(f, i)
   write(f, "\n")
 
-
-proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
-proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
+when declared(stdout):
+  proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
+  proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
 
 # interface to the C procs:
 
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index e7b7f3972..77310b289 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -114,41 +114,47 @@ proc newWideCString*(s: cstring): WideCString =
 proc newWideCString*(s: string): WideCString =
   result = newWideCString(s, s.len)
 
-proc `$`*(w: WideCString, estimate: int): string =
+proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
   result = newStringOfCap(estimate + estimate shr 2)
 
   var i = 0
   while w[i].int16 != 0'i16:
-    var ch = w[i].int
+    var ch = int(cast[uint16](w[i]))
     inc i
-    if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
+    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
       # If the 16 bits following the high surrogate are in the source buffer...
-      let ch2 = w[i].int
+      let ch2 = int(cast[uint16](w[i]))
+      
       # If it's a low surrogate, convert to UTF32:
-      if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
-        ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
-              (ch2 -% UNI_SUR_LOW_START) +% halfBase
+      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
+        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
         inc i
-        
-    if ch <=% 127:
+      else:
+        #invalid UTF-16
+        ch = replacement
+    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
+      #invalid UTF-16
+      ch = replacement
+      
+    if ch < 0x80:
       result.add chr(ch)
-    elif ch <=% 0x07FF:
-      result.add chr((ch shr 6) or 0b110_00000)
-      result.add chr((ch and ones(6)) or 0b10_000000)
-    elif ch <=% 0xFFFF:
-      result.add chr(ch shr 12 or 0b1110_0000)
-      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(ch and ones(6) or 0b10_0000_00)
-    elif ch <=% 0x0010FFFF:
-      result.add chr(ch shr 18 or 0b1111_0000)
-      result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
-      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(ch and ones(6) or 0b10_0000_00)
+    elif ch < 0x800:
+      result.add chr((ch shr 6) or 0xc0)
+      result.add chr((ch and 0x3f) or 0x80)
+    elif ch < 0x10000:
+      result.add chr((ch shr 12) or 0xe0)
+      result.add chr(((ch shr 6) and 0x3f) or 0x80)
+      result.add chr((ch and 0x3f) or 0x80)
+    elif ch <= 0x10FFFF:
+      result.add chr((ch shr 18) or 0xf0)
+      result.add chr(((ch shr 12) and 0x3f) or 0x80)
+      result.add chr(((ch shr 6) and 0x3f) or 0x80)
+      result.add chr((ch and 0x3f) or 0x80)
     else:
-      # replacement char:
+      # replacement char(in case user give very large number):
       result.add chr(0xFFFD shr 12 or 0b1110_0000)
       result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
       result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
-
+      
 proc `$`*(s: WideCString): string =
   result = s $ 80