57 files changed, 3382 insertions, 1155 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index fc21235bc..0e4700065 100755
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -520,7 +520,7 @@ type
   TNodeSeq* = seq[PNode]
   PType* = ref TType
   PSym* = ref TSym
-  TNode*{.acyclic, final.} = object # on a 32bit machine, this takes 32 bytes
+  TNode*{.final.} = object # on a 32bit machine, this takes 32 bytes
     typ*: PType
     comment*: string
     info*: TLineInfo
@@ -579,6 +579,9 @@ type
     flags*: TLocFlags         # location's flags
     t*: PType                 # type of location
     r*: PRope                 # rope value of location (code generators)
+    heapRoot*: PRope          # keeps track of the enclosing heap object that
+                              # owns this location (required by GC algorithms
+                              # employing heap snapshots or sliding views)
     a*: int                   # location's "address", i.e. slot for temporaries
 
   # ---------------- end of backend information ------------------------------
@@ -590,11 +593,33 @@ type
     generated*: bool          # needed for the backends:
     name*: PRope
     path*: PNode              # can be a string literal!
-    
-
+  
+  TInstantiation* = object
+    sym*: PSym
+    concreteTypes*: seq[PType]
+    usedBy*: seq[int32]       # list of modules using the generic
+                              # needed in caas mode for purging the cache
+                              # XXX: it's possible to switch to a 
+                              # simple ref count here
+  
+  PInstantiation* = ref TInstantiation
+      
   PLib* = ref TLib
   TSym* = object of TIdObj
-    kind*: TSymKind
+    # proc and type instantiations are cached in the generic symbol
+    case kind*: TSymKind
+    of skType:
+      typeInstCache*: seq[PType]
+    of routineKinds:
+      procInstCache*: seq[PInstantiation]
+    of skModule:
+      # modules keep track of the generic symbols they use from other modules.
+      # this is because in incremental compilation, when a module is about to
+      # be replaced with a newer version, we must decrement the usage count
+      # of all previously used generics.
+      usedGenerics*: seq[PInstantiation]
+    else: nil
+
     magic*: TMagic
     typ*: PType
     name*: PIdent
@@ -618,8 +643,9 @@ type
                               # for a conditional:
                               # 1 iff the symbol is defined, else 0
                               # (or not in symbol table)
-                              # for modules, a unique index corresponding
-                              # to the order of compilation 
+                              # for modules, an unique index corresponding
+                              # to the module's fileIdx
+
     offset*: int              # offset of record field
     loc*: TLoc
     annex*: PLib              # additional fields (seldom used, so we use a
@@ -827,6 +853,14 @@ proc linkTo*(s: PSym, t: PType): PSym {.discardable.} =
   s.typ = t
   result = s
 
+template fileIdx*(c: PSym): int32 =
+  # XXX: this should be used only on module symbols
+  c.position.int32
+
+template filename*(c: PSym): string =
+  # XXX: this should be used only on module symbols
+  c.position.int32.toFileName
+
 proc appendToModule*(m: PSym, n: PNode) =
   ## The compiler will use this internally to add nodes that will be
   ## appended to the module after the sem pass
diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim
index 53c371b3d..71e4fe39b 100644
--- a/compiler/ccgcalls.nim
+++ b/compiler/ccgcalls.nim
@@ -20,12 +20,12 @@ proc hasNoInit(call: PNode): bool {.inline.} =
 
 proc fixupCall(p: BProc, le, ri: PNode, d: var TLoc,
                callee, params: PRope) =
-  var pl = con(callee, "(".toRope, params)
+  var pl = con(callee, ~"(", params)
   # getUniqueType() is too expensive here:
   var typ = skipTypes(ri.sons[0].typ, abstractInst)
   if typ.sons[0] != nil:
     if isInvalidReturnType(typ.sons[0]):
-      if params != nil: pl.app(", ")
+      if params != nil: pl.app(~", ")
       # beware of 'result = p(result)'. We may need to allocate a temporary:
       if d.k in {locTemp, locNone} or not leftAppearsOnRightSide(le, ri):
         # Great, we can use 'd':
@@ -34,17 +34,17 @@ proc fixupCall(p: BProc, le, ri: PNode, d: var TLoc,
           # reset before pass as 'result' var:
           resetLoc(p, d)
         app(pl, addrLoc(d))
-        appf(pl, ");$n")
+        app(pl, ~");$n")
         line(p, cpsStmts, pl)
       else:
         var tmp: TLoc
         getTemp(p, typ.sons[0], tmp)
         app(pl, addrLoc(tmp))
-        appf(pl, ");$n")
+        app(pl, ~");$n")
         line(p, cpsStmts, pl)
         genAssignment(p, d, tmp, {}) # no need for deep copying
     else:
-      app(pl, ")")
+      app(pl, ~")")
       if d.k == locNone: getTemp(p, typ.sons[0], d)
       assert(d.t != nil)        # generate an assignment to d:
       var list: TLoc
@@ -52,7 +52,7 @@ proc fixupCall(p: BProc, le, ri: PNode, d: var TLoc,
       list.r = pl
       genAssignment(p, d, list, {}) # no need for deep copying
   else:
-    appf(pl, ");$n")
+    app(pl, ~");$n")
     line(p, cpsStmts, pl)
 
 proc isInCurrentFrame(p: BProc, n: PNode): bool =
@@ -130,7 +130,7 @@ proc genPrefixCall(p: BProc, le, ri: PNode, d: var TLoc) =
   var length = sonsLen(ri)
   for i in countup(1, length - 1):
     if ri.sons[i].typ.isCompileTimeOnly: continue
-    if params != nil: app(params, ", ")
+    if params != nil: app(params, ~", ")
     if i < sonsLen(typ):
       assert(typ.n.sons[i].kind == nkSym)
       app(params, genArg(p, ri.sons[i], typ.n.sons[i].sym))
@@ -144,7 +144,7 @@ proc genClosureCall(p: BProc, le, ri: PNode, d: var TLoc) =
     result = getClosureType(p.module, t, clHalf)
 
   proc addComma(r: PRope): PRope =
-    result = if r == nil: r else: con(r, ", ")
+    result = if r == nil: r else: con(r, ~", ")
 
   const CallPattern = "$1.ClEnv? $1.ClPrc($3$1.ClEnv) : (($4)($1.ClPrc))($2)"
   var op: TLoc
@@ -161,7 +161,7 @@ proc genClosureCall(p: BProc, le, ri: PNode, d: var TLoc) =
       app(pl, genArg(p, ri.sons[i], typ.n.sons[i].sym))
     else:
       app(pl, genArgNoParam(p, ri.sons[i]))
-    if i < length - 1: app(pl, ", ")
+    if i < length - 1: app(pl, ~", ")
   
   template genCallPattern =
     lineF(p, cpsStmts, CallPattern & ";$n", op.r, pl, pl.addComma, rawProc)
@@ -169,7 +169,7 @@ proc genClosureCall(p: BProc, le, ri: PNode, d: var TLoc) =
   let rawProc = getRawProcType(p, typ)
   if typ.sons[0] != nil:
     if isInvalidReturnType(typ.sons[0]):
-      if sonsLen(ri) > 1: app(pl, ", ")
+      if sonsLen(ri) > 1: app(pl, ~", ")
       # beware of 'result = p(result)'. We may need to allocate a temporary:
       if d.k in {locTemp, locNone} or not leftAppearsOnRightSide(le, ri):
         # Great, we can use 'd':
@@ -208,12 +208,12 @@ proc genInfixCall(p: BProc, le, ri: PNode, d: var TLoc) =
   var param = typ.n.sons[1].sym
   app(pl, genArg(p, ri.sons[1], param))
   
-  if skipTypes(param.typ, {tyGenericInst}).kind == tyPtr: app(pl, "->")
-  else: app(pl, ".")
+  if skipTypes(param.typ, {tyGenericInst}).kind == tyPtr: app(pl, ~"->")
+  else: app(pl, ~".")
   app(pl, op.r)
   var params: PRope
   for i in countup(2, length - 1):
-    if params != nil: params.app(", ")
+    if params != nil: params.app(~", ")
     assert(sonsLen(typ) == sonsLen(typ.n))
     if i < sonsLen(typ):
       assert(typ.n.sons[i].kind == nkSym)
@@ -226,7 +226,7 @@ proc genNamedParamCall(p: BProc, ri: PNode, d: var TLoc) =
   # generates a crappy ObjC call
   var op, a: TLoc
   initLocExpr(p, ri.sons[0], op)
-  var pl = toRope"["
+  var pl = ~"["
   # getUniqueType() is too expensive here:
   var typ = skipTypes(ri.sons[0].typ, abstractInst)
   assert(typ.kind == tyProc)
@@ -235,10 +235,10 @@ proc genNamedParamCall(p: BProc, ri: PNode, d: var TLoc) =
   
   if length > 1:
     app(pl, genArg(p, ri.sons[1], typ.n.sons[1].sym))
-    app(pl, " ")
+    app(pl, ~" ")
   app(pl, op.r)
   if length > 2:
-    app(pl, ": ")
+    app(pl, ~": ")
     app(pl, genArg(p, ri.sons[2], typ.n.sons[2].sym))
   for i in countup(3, length-1):
     assert(sonsLen(typ) == sonsLen(typ.n))
@@ -246,30 +246,30 @@ proc genNamedParamCall(p: BProc, ri: PNode, d: var TLoc) =
       InternalError(ri.info, "varargs for objective C method?")
     assert(typ.n.sons[i].kind == nkSym)
     var param = typ.n.sons[i].sym
-    app(pl, " ")
+    app(pl, ~" ")
     app(pl, param.name.s)
-    app(pl, ": ")
+    app(pl, ~": ")
     app(pl, genArg(p, ri.sons[i], param))
   if typ.sons[0] != nil:
     if isInvalidReturnType(typ.sons[0]):
-      if sonsLen(ri) > 1: app(pl, " ")
+      if sonsLen(ri) > 1: app(pl, ~" ")
       # beware of 'result = p(result)'. We always allocate a temporary:
       if d.k in {locTemp, locNone}:
         # We already got a temp. Great, special case it:
         if d.k == locNone: getTemp(p, typ.sons[0], d)
-        app(pl, "Result: ")
+        app(pl, ~"Result: ")
         app(pl, addrLoc(d))
-        appf(pl, "];$n")
+        app(pl, ~"];$n")
         line(p, cpsStmts, pl)
       else:
         var tmp: TLoc
         getTemp(p, typ.sons[0], tmp)
         app(pl, addrLoc(tmp))
-        appf(pl, "];$n")
+        app(pl, ~"];$n")
         line(p, cpsStmts, pl)
         genAssignment(p, d, tmp, {}) # no need for deep copying
     else:
-      app(pl, "]")
+      app(pl, ~"]")
       if d.k == locNone: getTemp(p, typ.sons[0], d)
       assert(d.t != nil)        # generate an assignment to d:
       var list: TLoc
@@ -277,7 +277,7 @@ proc genNamedParamCall(p: BProc, ri: PNode, d: var TLoc) =
       list.r = pl
       genAssignment(p, d, list, {}) # no need for deep copying
   else:
-    appf(pl, "];$n")
+    app(pl, ~"];$n")
     line(p, cpsStmts, pl)
 
 proc genCall(p: BProc, e: PNode, d: var TLoc) =
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index 7781040d9..20636f122 100755
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -19,15 +19,15 @@ proc intLiteral(i: biggestInt): PRope =
     result = toRope(i)
   elif i == low(int32):
     # Nimrod has the same bug for the same reasons :-)
-    result = toRope("(-2147483647 -1)")
+    result = ~"(-2147483647 -1)"
   elif i > low(int64):
-    result = ropef("IL64($1)", [toRope(i)])
+    result = rfmt(nil, "IL64($1)", toRope(i))
   else:
-    result = toRope("(IL64(-9223372036854775807) - IL64(1))")
+    result = ~"(IL64(-9223372036854775807) - IL64(1))"
 
 proc int32Literal(i: Int): PRope =
   if i == int(low(int32)):
-    result = toRope("(-2147483647 -1)")
+    result = ~"(-2147483647 -1)"
   else:
     result = toRope(i)
 
@@ -57,8 +57,8 @@ proc genLiteral(p: BProc, n: PNode, ty: PType): PRope =
       else:
         result = intLiteral(n.intVal)
     of tyBool:
-      if n.intVal != 0: result = toRope("NIM_TRUE")
-      else: result = toRope("NIM_FALSE")
+      if n.intVal != 0: result = ~"NIM_TRUE"
+      else: result = ~"NIM_FALSE"
     else:
       result = ropef("(($1) $2)", [getTypeDesc(p.module,
           skipTypes(ty, abstractVarRange)), intLiteral(n.intVal)])
@@ -165,7 +165,7 @@ proc getStorageLoc(n: PNode): TStorageLoc =
 
 proc genRefAssign(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
   if dest.s == OnStack or optRefcGC notin gGlobalOptions:
-    lineF(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)])
+    linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(dest), rdLoc(src))
     if needToKeepAlive in flags: keepAlive(p, dest)
   elif dest.s == OnHeap:
     # location is on heap
@@ -185,14 +185,14 @@ proc genRefAssign(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
     #      lineF(p, cpsStmts, 'if ($1) nimGCunref($1);$n', [rdLoc(dest)])
     #    lineF(p, cpsStmts, '$1 = $2;$n', [rdLoc(dest), rdLoc(src)])
     if canFormAcycle(dest.t):
-      lineCg(p, cpsStmts, "#asgnRef((void**) $1, $2);$n",
-           [addrLoc(dest), rdLoc(src)])
+      linefmt(p, cpsStmts, "#asgnRef((void**) $1, $2);$n",
+              addrLoc(dest), rdLoc(src))
     else:
-      lineCg(p, cpsStmts, "#asgnRefNoCycle((void**) $1, $2);$n",
-           [addrLoc(dest), rdLoc(src)])
+      linefmt(p, cpsStmts, "#asgnRefNoCycle((void**) $1, $2);$n",
+              addrLoc(dest), rdLoc(src))
   else:
-    lineCg(p, cpsStmts, "#unsureAsgnRef((void**) $1, $2);$n",
-         [addrLoc(dest), rdLoc(src)])
+    linefmt(p, cpsStmts, "#unsureAsgnRef((void**) $1, $2);$n",
+            addrLoc(dest), rdLoc(src))
     if needToKeepAlive in flags: keepAlive(p, dest)
 
 proc genGenericAsgn(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
@@ -205,23 +205,23 @@ proc genGenericAsgn(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
   if needToCopy notin flags or 
       tfShallow in skipTypes(dest.t, abstractVarRange).flags:
     if dest.s == OnStack or optRefcGC notin gGlobalOptions:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "memcpy((void*)$1, (NIM_CONST void*)$2, sizeof($3));$n",
-           [addrLoc(dest), addrLoc(src), rdLoc(dest)])
+           addrLoc(dest), addrLoc(src), rdLoc(dest))
       if needToKeepAlive in flags: keepAlive(p, dest)
     else:
-      lineCg(p, cpsStmts, "#genericShallowAssign((void*)$1, (void*)$2, $3);$n",
-           [addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t)])
+      linefmt(p, cpsStmts, "#genericShallowAssign((void*)$1, (void*)$2, $3);$n",
+              addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t))
   else:
-    lineCg(p, cpsStmts, "#genericAssign((void*)$1, (void*)$2, $3);$n",
-         [addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t)])
+    linefmt(p, cpsStmts, "#genericAssign((void*)$1, (void*)$2, $3);$n",
+            addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t))
 
 proc genAssignment(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
   # This function replaces all other methods for generating
   # the assignment operation in C.
   if src.t != nil and src.t.kind == tyPtr:
     # little HACK to support the new 'var T' as return type:
-    lineCg(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)])
+    linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(dest), rdLoc(src))
     return
   var ty = skipTypes(dest.t, abstractRange)
   case ty.kind
@@ -231,59 +231,59 @@ proc genAssignment(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) =
     if needToCopy notin flags:
       genRefAssign(p, dest, src, flags)
     else:
-      lineCg(p, cpsStmts, "#genericSeqAssign($1, $2, $3);$n",
-           [addrLoc(dest), rdLoc(src), genTypeInfo(p.module, dest.t)])
+      linefmt(p, cpsStmts, "#genericSeqAssign($1, $2, $3);$n",
+              addrLoc(dest), rdLoc(src), genTypeInfo(p.module, dest.t))
   of tyString:
     if needToCopy notin flags:
       genRefAssign(p, dest, src, flags)
     else:
       if dest.s == OnStack or optRefcGC notin gGlobalOptions:
-        lineCg(p, cpsStmts, "$1 = #copyString($2);$n", [dest.rdLoc, src.rdLoc])
+        linefmt(p, cpsStmts, "$1 = #copyString($2);$n", dest.rdLoc, src.rdLoc)
         if needToKeepAlive in flags: keepAlive(p, dest)
       elif dest.s == OnHeap:
         # we use a temporary to care for the dreaded self assignment:
         var tmp: TLoc
         getTemp(p, ty, tmp)
-        lineCg(p, cpsStmts, "$3 = $1; $1 = #copyStringRC1($2);$n",
-             [dest.rdLoc, src.rdLoc, tmp.rdLoc])
-        lineCg(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", tmp.rdLoc)
+        linefmt(p, cpsStmts, "$3 = $1; $1 = #copyStringRC1($2);$n",
+                dest.rdLoc, src.rdLoc, tmp.rdLoc)
+        linefmt(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", tmp.rdLoc)
       else:
-        lineCg(p, cpsStmts, "#unsureAsgnRef((void**) $1, #copyString($2));$n",
-             [addrLoc(dest), rdLoc(src)])
+        linefmt(p, cpsStmts, "#unsureAsgnRef((void**) $1, #copyString($2));$n",
+               addrLoc(dest), rdLoc(src))
         if needToKeepAlive in flags: keepAlive(p, dest)
   of tyTuple, tyObject, tyProc:
     # XXX: check for subtyping?
     if needsComplexAssignment(dest.t):
       genGenericAsgn(p, dest, src, flags)
     else:
-      lineCg(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)])
+      linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(dest), rdLoc(src))
   of tyArray, tyArrayConstr:
     if needsComplexAssignment(dest.t):
       genGenericAsgn(p, dest, src, flags)
     else:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "memcpy((void*)$1, (NIM_CONST void*)$2, sizeof($1));$n",
-           [rdLoc(dest), rdLoc(src)])
+           rdLoc(dest), rdLoc(src))
   of tyOpenArray, tyVarargs:
     # open arrays are always on the stack - really? What if a sequence is
     # passed to an open array?
     if needsComplexAssignment(dest.t):
-      lineCg(p, cpsStmts,     # XXX: is this correct for arrays?
+      linefmt(p, cpsStmts,     # XXX: is this correct for arrays?
            "#genericAssignOpenArray((void*)$1, (void*)$2, $1Len0, $3);$n",
-           [addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t)])
+           addrLoc(dest), addrLoc(src), genTypeInfo(p.module, dest.t))
     else:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "memcpy((void*)$1, (NIM_CONST void*)$2, sizeof($1[0])*$1Len0);$n",
-           [rdLoc(dest), rdLoc(src)])
+           rdLoc(dest), rdLoc(src))
   of tySet:
     if mapType(ty) == ctArray:
-      lineCg(p, cpsStmts, "memcpy((void*)$1, (NIM_CONST void*)$2, $3);$n",
-           [rdLoc(dest), rdLoc(src), toRope(getSize(dest.t))])
+      linefmt(p, cpsStmts, "memcpy((void*)$1, (NIM_CONST void*)$2, $3);$n",
+              rdLoc(dest), rdLoc(src), toRope(getSize(dest.t)))
     else:
-      lineCg(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)])
+      linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(dest), rdLoc(src))
   of tyPtr, tyPointer, tyChar, tyBool, tyEnum, tyCString,
      tyInt..tyUInt64, tyRange, tyVar:
-    lineCg(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)])
+    linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(dest), rdLoc(src))
   else: InternalError("genAssignment(" & $ty.kind & ')')
 
 proc expr(p: BProc, e: PNode, d: var TLoc)
@@ -322,7 +322,7 @@ proc binaryStmt(p: BProc, e: PNode, d: var TLoc, frmt: string) =
   if d.k != locNone: InternalError(e.info, "binaryStmt")
   InitLocExpr(p, e.sons[1], d)
   InitLocExpr(p, e.sons[2], b)
-  lineCg(p, cpsStmts, frmt, [rdLoc(d), rdLoc(b)])
+  lineCg(p, cpsStmts, frmt, rdLoc(d), rdLoc(b))
 
 proc unaryStmt(p: BProc, e: PNode, d: var TLoc, frmt: string) =
   var a: TLoc
@@ -387,12 +387,12 @@ proc binaryArithOverflow(p: BProc, e: PNode, d: var TLoc, m: TMagic) =
     else:
       storage = getTypeDesc(p.module, t)
     var tmp = getTempName()
-    lineCg(p, cpsLocals, "$1 $2;$n", [storage, tmp])
-    lineCg(p, cpsStmts, "$1 = #$2($3, $4);$n", [tmp, toRope(prc[m]), 
-                                             rdLoc(a), rdLoc(b)])
+    linefmt(p, cpsLocals, "$1 $2;$n", storage, tmp)
+    lineCg(p, cpsStmts, "$1 = #$2($3, $4);$n",
+                         tmp, toRope(prc[m]), rdLoc(a), rdLoc(b))
     if size < platform.IntSize or t.kind in {tyRange, tyEnum, tySet}:
-      lineCg(p, cpsStmts, "if ($1 < $2 || $1 > $3) #raiseOverflow();$n",
-           [tmp, intLiteral(firstOrd(t)), intLiteral(lastOrd(t))])
+      linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3) #raiseOverflow();$n",
+              tmp, intLiteral(firstOrd(t)), intLiteral(lastOrd(t)))
     putIntoDest(p, d, e.typ, ropef("(NI$1)($2)", [toRope(getSize(t)*8), tmp]))
 
 proc unaryArithOverflow(p: BProc, e: PNode, d: var TLoc, m: TMagic) =
@@ -409,8 +409,8 @@ proc unaryArithOverflow(p: BProc, e: PNode, d: var TLoc, m: TMagic) =
   InitLocExpr(p, e.sons[1], a)
   t = skipTypes(e.typ, abstractRange)
   if optOverflowCheck in p.options:
-    lineCg(p, cpsStmts, "if ($1 == $2) #raiseOverflow();$n",
-         [rdLoc(a), intLiteral(firstOrd(t))])
+    linefmt(p, cpsStmts, "if ($1 == $2) #raiseOverflow();$n",
+            rdLoc(a), intLiteral(firstOrd(t)))
   putIntoDest(p, d, e.typ, ropef(opr[m], [rdLoc(a), toRope(getSize(t) * 8)]))
 
 proc binaryArith(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
@@ -562,10 +562,15 @@ proc genAddr(p: BProc, e: PNode, d: var TLoc) =
     InitLocExpr(p, e.sons[0], a)
     putIntoDest(p, d, e.typ, addrLoc(a))
 
+template inheritLocation(d: var TLoc, a: TLoc) =
+  if d.k == locNone: d.s = a.s
+  if d.heapRoot == nil:
+    d.heapRoot = if a.heapRoot != nil: a.heapRoot else: a.r
+  
 proc genRecordFieldAux(p: BProc, e: PNode, d, a: var TLoc): PType =
   initLocExpr(p, e.sons[0], a)
   if e.sons[1].kind != nkSym: InternalError(e.info, "genRecordFieldAux")
-  if d.k == locNone: d.s = a.s
+  d.inheritLocation(a)
   discard getTypeDesc(p.module, a.t) # fill the record's fields.loc
   result = a.t
 
@@ -598,7 +603,7 @@ proc genTupleElem(p: BProc, e: PNode, d: var TLoc) =
     a: TLoc
     i: int
   initLocExpr(p, e.sons[0], a)
-  if d.k == locNone: d.s = a.s
+  d.inheritLocation(a)
   discard getTypeDesc(p.module, a.t) # fill the record's fields.loc
   var ty = a.t
   var r = rdLoc(a)
@@ -655,14 +660,14 @@ proc genCheckedRecordField(p: BProc, e: PNode, d: var TLoc) =
       if id == gBackendId: strLit = getStrLit(p.module, field.name.s)
       else: strLit = con("TMP", toRope(id))
       if op.magic == mNot:
-        lineCg(p, cpsStmts,
-             "if ($1) #raiseFieldError(((#NimStringDesc*) &$2));$n",
-             [rdLoc(test), strLit])
+        linefmt(p, cpsStmts,
+                "if ($1) #raiseFieldError(((#NimStringDesc*) &$2));$n",
+                rdLoc(test), strLit)
       else:
-        lineCg(p, cpsStmts,
-             "if (!($1)) #raiseFieldError(((#NimStringDesc*) &$2));$n",
-             [rdLoc(test), strLit])
-    appf(r, ".$1", [field.loc.r])
+        linefmt(p, cpsStmts,
+                "if (!($1)) #raiseFieldError(((#NimStringDesc*) &$2));$n",
+                rdLoc(test), strLit)
+    app(r, rfmt(nil, ".$1", field.loc.r))
     putIntoDest(p, d, field.typ, r)
   else:
     genRecordField(p, e.sons[0], d)
@@ -679,18 +684,18 @@ proc genArrayElem(p: BProc, e: PNode, d: var TLoc) =
       # semantic pass has already checked for const index expressions
       if firstOrd(ty) == 0:
         if (firstOrd(b.t) < firstOrd(ty)) or (lastOrd(b.t) > lastOrd(ty)):
-          lineCg(p, cpsStmts, "if ((NU)($1) > (NU)($2)) #raiseIndexError();$n",
-               [rdCharLoc(b), intLiteral(lastOrd(ty))])
+          linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)) #raiseIndexError();$n",
+                  rdCharLoc(b), intLiteral(lastOrd(ty)))
       else:
-        lineCg(p, cpsStmts, "if ($1 < $2 || $1 > $3) #raiseIndexError();$n",
-             [rdCharLoc(b), first, intLiteral(lastOrd(ty))])
+        linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3) #raiseIndexError();$n",
+                rdCharLoc(b), first, intLiteral(lastOrd(ty)))
     else:
       let idx = getOrdValue(e.sons[1])
       if idx < firstOrd(ty) or idx > lastOrd(ty):
         localError(e.info, errIndexOutOfBounds)
-  if d.k == locNone: d.s = a.s
+  d.inheritLocation(a)
   putIntoDest(p, d, elemType(skipTypes(ty, abstractVar)),
-              ropef("$1[($2)- $3]", [rdLoc(a), rdCharLoc(b), first]))
+              rfmt(nil, "$1[($2)- $3]", rdLoc(a), rdCharLoc(b), first))
 
 proc genCStringElem(p: BProc, e: PNode, d: var TLoc) =
   var a, b: TLoc
@@ -699,18 +704,18 @@ proc genCStringElem(p: BProc, e: PNode, d: var TLoc) =
   var ty = skipTypes(a.t, abstractVarRange)
   if d.k == locNone: d.s = a.s
   putIntoDest(p, d, elemType(skipTypes(ty, abstractVar)),
-              ropef("$1[$2]", [rdLoc(a), rdCharLoc(b)]))
+              rfmt(nil, "$1[$2]", rdLoc(a), rdCharLoc(b)))
 
 proc genOpenArrayElem(p: BProc, e: PNode, d: var TLoc) =
   var a, b: TLoc
   initLocExpr(p, e.sons[0], a)
   initLocExpr(p, e.sons[1], b) # emit range check:
   if optBoundsCheck in p.options:
-    lineCg(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len0)) #raiseIndexError();$n",
-         [rdLoc(b), rdLoc(a)]) # BUGFIX: ``>=`` and not ``>``!
+    linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len0)) #raiseIndexError();$n",
+            rdLoc(b), rdLoc(a)) # BUGFIX: ``>=`` and not ``>``!
   if d.k == locNone: d.s = a.s
   putIntoDest(p, d, elemType(skipTypes(a.t, abstractVar)),
-              ropef("$1[$2]", [rdLoc(a), rdCharLoc(b)]))
+              rfmt(nil, "$1[$2]", rdLoc(a), rdCharLoc(b)))
 
 proc genSeqElem(p: BPRoc, e: PNode, d: var TLoc) =
   var a, b: TLoc
@@ -721,18 +726,19 @@ proc genSeqElem(p: BPRoc, e: PNode, d: var TLoc) =
     ty = skipTypes(ty.sons[0], abstractVarRange) # emit range check:
   if optBoundsCheck in p.options:
     if ty.kind == tyString:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "if ((NU)($1) > (NU)($2->$3)) #raiseIndexError();$n",
-           [rdLoc(b), rdLoc(a), lenField()])
+           rdLoc(b), rdLoc(a), lenField())
     else:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "if ((NU)($1) >= (NU)($2->$3)) #raiseIndexError();$n",
-           [rdLoc(b), rdLoc(a), lenField()])
+           rdLoc(b), rdLoc(a), lenField())
   if d.k == locNone: d.s = OnHeap
+  d.heapRoot = a.r
   if skipTypes(a.t, abstractVar).kind in {tyRef, tyPtr}:
-    a.r = ropef("(*$1)", [a.r])
+    a.r = rfmt(nil, "(*$1)", a.r)
   putIntoDest(p, d, elemType(skipTypes(a.t, abstractVar)),
-              ropef("$1->data[$2]", [rdLoc(a), rdCharLoc(b)]))
+              rfmt(nil, "$1->data[$2]", rdLoc(a), rdCharLoc(b)))
 
 proc genAndOr(p: BProc, e: PNode, d: var TLoc, m: TMagic) =
   # how to generate code?
@@ -818,8 +824,8 @@ proc genEcho(p: BProc, n: PNode) =
   for i in countup(1, n.len-1):
     initLocExpr(p, n.sons[i], a)
     appf(args, ", ($1)->data", [rdLoc(a)])
-  lineCg(p, cpsStmts, "printf($1$2);$n", [
-    makeCString(repeatStr(n.len-1, "%s") & tnl), args])
+  linefmt(p, cpsStmts, "printf($1$2);$n",
+          makeCString(repeatStr(n.len-1, "%s") & tnl), args)
 
 include ccgcalls
 
@@ -850,14 +856,14 @@ proc genStrConcat(p: BProc, e: PNode, d: var TLoc) =
     initLocExpr(p, e.sons[i + 1], a)
     if skipTypes(e.sons[i + 1].Typ, abstractVarRange).kind == tyChar:
       Inc(L)
-      appLineCg(p, appends, "#appendChar($1, $2);$n", [tmp.r, rdLoc(a)])
+      app(appends, rfmt(p.module, "#appendChar($1, $2);$n", tmp.r, rdLoc(a)))
     else:
       if e.sons[i + 1].kind in {nkStrLit..nkTripleStrLit}:
         Inc(L, len(e.sons[i + 1].strVal))
       else:
         appf(lens, "$1->$2 + ", [rdLoc(a), lenField()])
-      appLineCg(p, appends, "#appendString($1, $2);$n", [tmp.r, rdLoc(a)])
-  lineCg(p, cpsStmts, "$1 = #rawNewString($2$3);$n", [tmp.r, lens, toRope(L)])
+      app(appends, rfmt(p.module, "#appendString($1, $2);$n", tmp.r, rdLoc(a)))
+  linefmt(p, cpsStmts, "$1 = #rawNewString($2$3);$n", tmp.r, lens, toRope(L))
   app(p.s(cpsStmts), appends)
   if d.k == locNone:
     d = tmp
@@ -888,17 +894,17 @@ proc genStrAppend(p: BProc, e: PNode, d: var TLoc) =
     initLocExpr(p, e.sons[i + 2], a)
     if skipTypes(e.sons[i + 2].Typ, abstractVarRange).kind == tyChar:
       Inc(L)
-      appLineCg(p, appends, "#appendChar($1, $2);$n",
-            [rdLoc(dest), rdLoc(a)])
+      app(appends, rfmt(p.module, "#appendChar($1, $2);$n",
+                        rdLoc(dest), rdLoc(a)))
     else:
       if e.sons[i + 2].kind in {nkStrLit..nkTripleStrLit}:
         Inc(L, len(e.sons[i + 2].strVal))
       else:
         appf(lens, "$1->$2 + ", [rdLoc(a), lenField()])
-      appLineCg(p, appends, "#appendString($1, $2);$n",
-            [rdLoc(dest), rdLoc(a)])
-  lineCg(p, cpsStmts, "$1 = #resizeString($1, $2$3);$n",
-       [rdLoc(dest), lens, toRope(L)])
+      app(appends, rfmt(p.module, "#appendString($1, $2);$n",
+                        rdLoc(dest), rdLoc(a)))
+  linefmt(p, cpsStmts, "$1 = #resizeString($1, $2$3);$n",
+          rdLoc(dest), lens, toRope(L))
   keepAlive(p, dest)
   app(p.s(cpsStmts), appends)
 
@@ -920,14 +926,14 @@ proc genSeqElemAppend(p: BProc, e: PNode, d: var TLoc) =
       getTypeDesc(p.module, skipTypes(e.sons[2].Typ, abstractVar))])
   keepAlive(p, a)
   initLoc(dest, locExpr, b.t, OnHeap)
-  dest.r = ropef("$1->data[$1->$2-1]", [rdLoc(a), lenField()])
+  dest.r = rfmt(nil, "$1->data[$1->$2-1]", rdLoc(a), lenField())
   genAssignment(p, dest, b, {needToCopy, afDestIsNil})
 
 proc genReset(p: BProc, n: PNode) = 
   var a: TLoc
   InitLocExpr(p, n.sons[1], a)
-  lineCg(p, cpsStmts, "#genericReset((void*)$1, $2);$n", 
-       [addrLoc(a), genTypeInfo(p.module, skipTypes(a.t, abstractVarRange))])
+  linefmt(p, cpsStmts, "#genericReset((void*)$1, $2);$n",
+          addrLoc(a), genTypeInfo(p.module, skipTypes(a.t, abstractVarRange)))
 
 proc genNew(p: BProc, e: PNode) =
   var
@@ -951,11 +957,11 @@ proc genNew(p: BProc, e: PNode) =
   if a.s == OnHeap and optRefcGc in gGlobalOptions:
     # use newObjRC1 as an optimization; and we don't need 'keepAlive' either
     if canFormAcycle(a.t):
-      lineCg(p, cpsStmts, "if ($1) #nimGCunref($1);$n", a.rdLoc)
+      linefmt(p, cpsStmts, "if ($1) #nimGCunref($1);$n", a.rdLoc)
     else:
-      lineCg(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", a.rdLoc)
+      linefmt(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", a.rdLoc)
     b.r = ropecg(p.module, "($1) #newObjRC1($2, $3)", args)
-    lineCg(p, cpsStmts, "$1 = $2;$n", a.rdLoc, b.rdLoc)
+    linefmt(p, cpsStmts, "$1 = $2;$n", a.rdLoc, b.rdLoc)
   else:
     b.r = ropecg(p.module, "($1) #newObj($2, $3)", args)
     genAssignment(p, a, b, {needToKeepAlive})  # set the object type:
@@ -969,9 +975,9 @@ proc genNewSeqAux(p: BProc, dest: TLoc, length: PRope) =
   var call: TLoc
   initLoc(call, locExpr, dest.t, OnHeap)
   if dest.s == OnHeap and optRefcGc in gGlobalOptions:
-    lineCg(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", dest.rdLoc)
+    linefmt(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", dest.rdLoc)
     call.r = ropecg(p.module, "($1) #newSeqRC1($2, $3)", args)
-    lineCg(p, cpsStmts, "$1 = $2;$n", dest.rdLoc, call.rdLoc)
+    linefmt(p, cpsStmts, "$1 = $2;$n", dest.rdLoc, call.rdLoc)
   else:
     call.r = ropecg(p.module, "($1) #newSeq($2, $3)", args)
     genAssignment(p, dest, call, {needToKeepAlive})
@@ -990,7 +996,7 @@ proc genSeqConstr(p: BProc, t: PNode, d: var TLoc) =
   genNewSeqAux(p, d, intLiteral(sonsLen(t)))
   for i in countup(0, sonsLen(t) - 1):
     initLoc(arr, locExpr, elemType(skipTypes(t.typ, abstractInst)), OnHeap)
-    arr.r = ropef("$1->data[$2]", [rdLoc(d), intLiteral(i)])
+    arr.r = rfmt(nil, "$1->data[$2]", rdLoc(d), intLiteral(i))
     arr.s = OnHeap            # we know that sequences are on the heap
     expr(p, t.sons[i], arr)
 
@@ -1009,10 +1015,10 @@ proc genArrToSeq(p: BProc, t: PNode, d: var TLoc) =
   initLocExpr(p, t.sons[1], a)
   for i in countup(0, L - 1):
     initLoc(elem, locExpr, elemType(skipTypes(t.typ, abstractInst)), OnHeap)
-    elem.r = ropef("$1->data[$2]", [rdLoc(d), intLiteral(i)])
+    elem.r = rfmt(nil, "$1->data[$2]", rdLoc(d), intLiteral(i))
     elem.s = OnHeap # we know that sequences are on the heap
     initLoc(arr, locExpr, elemType(skipTypes(t.sons[1].typ, abstractInst)), a.s)
-    arr.r = ropef("$1[$2]", [rdLoc(a), intLiteral(i)])
+    arr.r = rfmt(nil, "$1[$2]", rdLoc(a), intLiteral(i))
     genAssignment(p, elem, arr, {afDestIsNil, needToCopy})
   
 proc genNewFinalize(p: BProc, e: PNode) =
@@ -1043,21 +1049,21 @@ proc genOf(p: BProc, x: PNode, typ: PType, d: var TLoc) =
   var t = skipTypes(a.t, abstractInst)
   while t.kind in {tyVar, tyPtr, tyRef}:
     if t.kind != tyVar: nilCheck = r
-    r = ropef("(*$1)", [r])
+    r = rfmt(nil, "(*$1)", r)
     t = skipTypes(t.sons[0], typedescInst)
   if gCmd != cmdCompileToCpp:
     while (t.kind == tyObject) and (t.sons[0] != nil):
-      app(r, ".Sup")
+      app(r, ~".Sup")
       t = skipTypes(t.sons[0], typedescInst)
   if isObjLackingTypeField(t):
     GlobalError(x.info, errGenerated, 
       "no 'of' operator available for pure objects")
   if nilCheck != nil:
-    r = ropecg(p.module, "(($1) && #isObj($2.m_type, $3))",
-              [nilCheck, r, genTypeInfo(p.module, dest)])
+    r = rfmt(p.module, "(($1) && #isObj($2.m_type, $3))",
+             nilCheck, r, genTypeInfo(p.module, dest))
   else:
-    r = ropecg(p.module, "#isObj($1.m_type, $2)", 
-              [r, genTypeInfo(p.module, dest)])
+    r = rfmt(p.module, "#isObj($1.m_type, $2)",
+             r, genTypeInfo(p.module, dest))
   putIntoDest(p, d, getSysType(tyBool), r)
 
 proc genOf(p: BProc, n: PNode, d: var TLoc) =
@@ -1373,12 +1379,12 @@ proc genStrEquals(p: BProc, e: PNode, d: var TLoc) =
     binaryExpr(p, e, d, "($1 == $2)")
   elif (a.kind in {nkStrLit..nkTripleStrLit}) and (a.strVal == ""):
     initLocExpr(p, e.sons[2], x)
-    putIntoDest(p, d, e.typ, 
-      ropef("(($1) && ($1)->$2 == 0)", [rdLoc(x), lenField()]))
+    putIntoDest(p, d, e.typ,
+      rfmt(nil, "(($1) && ($1)->$2 == 0)", rdLoc(x), lenField()))
   elif (b.kind in {nkStrLit..nkTripleStrLit}) and (b.strVal == ""):
     initLocExpr(p, e.sons[1], x)
-    putIntoDest(p, d, e.typ, 
-      ropef("(($1) && ($1)->$2 == 0)", [rdLoc(x), lenField()]))
+    putIntoDest(p, d, e.typ,
+      rfmt(nil, "(($1) && ($1)->$2 == 0)", rdLoc(x), lenField()))
   else:
     binaryExpr(p, e, d, "#eqStrings($1, $2)")
 
@@ -1390,12 +1396,12 @@ proc binaryFloatArith(p: BProc, e: PNode, d: var TLoc, m: TMagic) =
     assert(e.sons[2].typ != nil)
     InitLocExpr(p, e.sons[1], a)
     InitLocExpr(p, e.sons[2], b)
-    putIntoDest(p, d, e.typ, ropef("($2 $1 $3)", [
-                toRope(opr[m]), rdLoc(a), rdLoc(b)]))
+    putIntoDest(p, d, e.typ, rfmt(nil, "($2 $1 $3)",
+                                  toRope(opr[m]), rdLoc(a), rdLoc(b)))
     if optNanCheck in p.options:
-      lineCg(p, cpsStmts, "#nanCheck($1);$n", [rdLoc(d)])
+      linefmt(p, cpsStmts, "#nanCheck($1);$n", rdLoc(d))
     if optInfCheck in p.options:
-      lineCg(p, cpsStmts, "#infCheck($1);$n", [rdLoc(d)])
+      linefmt(p, cpsStmts, "#infCheck($1);$n", rdLoc(d))
   else:
     binaryArith(p, e, d, m)
 
@@ -1586,8 +1592,8 @@ proc genClosure(p: BProc, n: PNode, d: var TLoc) =
     initLocExpr(p, n.sons[0], a)
     initLocExpr(p, n.sons[1], b)
     getTemp(p, n.typ, tmp)
-    lineCg(p, cpsStmts, "$1.ClPrc = $2; $1.ClEnv = $3;$n",
-          tmp.rdLoc, a.rdLoc, b.rdLoc)
+    linefmt(p, cpsStmts, "$1.ClPrc = $2; $1.ClEnv = $3;$n",
+            tmp.rdLoc, a.rdLoc, b.rdLoc)
     putLocIntoDest(p, d, tmp)
 
 proc genArrayConstr(p: BProc, n: PNode, d: var TLoc) =
@@ -1626,11 +1632,11 @@ proc upConv(p: BProc, n: PNode, d: var TLoc) =
         app(r, ".Sup")
         t = skipTypes(t.sons[0], abstractInst)
     if nilCheck != nil:
-      lineCg(p, cpsStmts, "if ($1) #chckObj($2.m_type, $3);$n",
-           [nilCheck, r, genTypeInfo(p.module, dest)])
+      linefmt(p, cpsStmts, "if ($1) #chckObj($2.m_type, $3);$n",
+              nilCheck, r, genTypeInfo(p.module, dest))
     else:
-      lineCg(p, cpsStmts, "#chckObj($1.m_type, $2);$n",
-           [r, genTypeInfo(p.module, dest)])
+      linefmt(p, cpsStmts, "#chckObj($1.m_type, $2);$n",
+              r, genTypeInfo(p.module, dest))
   if n.sons[0].typ.kind != tyObject:
     putIntoDest(p, d, n.typ,
                 ropef("(($1) ($2))", [getTypeDesc(p.module, n.typ), rdLoc(a)]))
diff --git a/compiler/ccgmerge.nim b/compiler/ccgmerge.nim
index 027cd5780..baf4f5586 100644
--- a/compiler/ccgmerge.nim
+++ b/compiler/ccgmerge.nim
@@ -44,9 +44,12 @@ const
     cpsStmts: "NIM_merge_PROC_BODY"
   ]
   NimMergeEndMark = "/*\tNIM_merge_END:*/"
-  
+
+template mergeSectionsEnabled: expr =
+  {optCaasEnabled, optSymbolFiles} * gGlobalOptions != {}
+
 proc genSectionStart*(fs: TCFileSection): PRope =
-  if optSymbolFiles in gGlobalOptions:
+  if mergeSectionsEnabled:
     result = toRope(tnl)
     app(result, "/*\t")
     app(result, CFileSectionNames[fs])
@@ -54,11 +57,11 @@ proc genSectionStart*(fs: TCFileSection): PRope =
     app(result, tnl)
 
 proc genSectionEnd*(fs: TCFileSection): PRope =
-  if optSymbolFiles in gGlobalOptions:
+  if mergeSectionsEnabled:
     result = toRope(NimMergeEndMark & tnl)
 
 proc genSectionStart*(ps: TCProcSection): PRope =
-  if optSymbolFiles in gGlobalOptions:
+  if mergeSectionsEnabled:
     result = toRope(tnl)
     app(result, "/*\t")
     app(result, CProcSectionNames[ps])
@@ -66,7 +69,7 @@ proc genSectionStart*(ps: TCProcSection): PRope =
     app(result, tnl)
 
 proc genSectionEnd*(ps: TCProcSection): PRope =
-  if optSymbolFiles in gGlobalOptions:
+  if mergeSectionsEnabled:
     result = toRope(NimMergeEndMark & tnl)
 
 proc writeTypeCache(a: TIdTable, s: var string) =
diff --git a/compiler/ccgstmts.nim b/compiler/ccgstmts.nim
index 9d5659f5e..2f07d24cb 100755
--- a/compiler/ccgstmts.nim
+++ b/compiler/ccgstmts.nim
@@ -72,14 +72,12 @@ proc endBlock(p: BProc, blockEnd: PRope) =
   # properly indented when pretty printing is enabled
   line(p, cpsStmts, blockEnd)
 
-var gBlockEndBracket = ropef("}$n")
-
 proc endBlock(p: BProc) =
   let topBlock = p.blocks.len - 1  
   let blockEnd = if p.blocks[topBlock].label != nil:
-      ropef("} $1: ;$n", [p.blocks[topBlock].label])
+      rfmt(nil, "} $1: ;$n", p.blocks[topBlock].label)
     else:
-      gBlockEndBracket
+      ~"}$n"
   endBlock(p, blockEnd)
 
 proc genSimpleBlock(p: BProc, stmts: PNode) {.inline.} =
@@ -95,7 +93,7 @@ template preserveBreakIdx(body: stmt): stmt {.immediate.} =
 proc genState(p: BProc, n: PNode) =
   internalAssert n.len == 1 and n.sons[0].kind == nkIntLit
   let idx = n.sons[0].intVal
-  lineCg(p, cpsStmts, "STATE$1: ;$n", [idx.toRope])
+  linefmt(p, cpsStmts, "STATE$1: ;$n", idx.toRope)
 
 proc genGotoState(p: BProc, n: PNode) =
   # we resist the temptation to translate it into duff's device as it later
@@ -248,7 +246,7 @@ proc blockLeaveActions(p: BProc, howMany: int) =
       if alreadyPoppedCnt > 0:
         dec alreadyPoppedCnt
       else:
-        lineCg(p, cpsStmts, "#popSafePoint();$n", [])
+        linefmt(p, cpsStmts, "#popSafePoint();$n")
     var finallyStmt = lastSon(tryStmt)
     if finallyStmt.kind == nkFinally: 
       genStmts(p, finallyStmt.sons[0])
@@ -257,7 +255,7 @@ proc blockLeaveActions(p: BProc, howMany: int) =
     p.nestedTryStmts.add(stack[i])
   if gCmd != cmdCompileToCpp:
     for i in countdown(p.inExceptBlock-1, 0):
-      lineCg(p, cpsStmts, "#popCurrentException();$n", [])
+      linefmt(p, cpsStmts, "#popCurrentException();$n")
 
 proc genReturnStmt(p: BProc, t: PNode) =
   p.beforeRetNeeded = true
@@ -287,7 +285,7 @@ proc genWhileStmt(p: BProc, t: PNode) =
 
     if optProfiler in p.options:
       # invoke at loop body exit:
-      lineCg(p, cpsStmts, "#nimProfile();$n")
+      linefmt(p, cpsStmts, "#nimProfile();$n")
     endBlock(p)
 
   dec(p.withinLoop)
@@ -375,9 +373,9 @@ proc genRaiseStmt(p: BProc, t: PNode) =
     genLineDir(p, t)
     # reraise the last exception:
     if gCmd == cmdCompileToCpp:
-      lineCg(p, cpsStmts, "throw;$n")
+      line(p, cpsStmts, ~"throw;$n")
     else:
-      lineCg(p, cpsStmts, "#reraiseException();$n")
+      linefmt(p, cpsStmts, "#reraiseException();$n")
 
 proc genCaseGenericBranch(p: BProc, b: PNode, e: TLoc, 
                           rangeFormat, eqFormat: TFormatStr, labl: TLabel) = 
@@ -464,8 +462,8 @@ proc genStringCase(p: BProc, t: PNode) =
       else: 
         # else statement: nothing to do yet
         # but we reserved a label, which we use later
-    lineCg(p, cpsStmts, "switch (#hashString($1) & $2) {$n", 
-         [rdLoc(a), toRope(bitMask)])
+    linefmt(p, cpsStmts, "switch (#hashString($1) & $2) {$n", 
+            rdLoc(a), toRope(bitMask))
     for j in countup(0, high(branches)):
       when false:
         let interior = cast[int](interiorAllocatedPtr(addr(branches[0])))+
@@ -604,7 +602,7 @@ proc genTryStmtCpp(p: BProc, t: PNode) =
   length = sonsLen(t)
   endBlock(p, ropecg(p.module, "} catch (NimException& $1) {$n", [exc]))
   if optStackTrace in p.Options:
-    lineCg(p, cpsStmts, "#setFrame((TFrame*)&F);$n")
+    linefmt(p, cpsStmts, "#setFrame((TFrame*)&F);$n")
   inc p.inExceptBlock
   i = 1
   var catchAllPresent = false
@@ -635,7 +633,7 @@ proc genTryStmtCpp(p: BProc, t: PNode) =
     var finallyBlock = t.lastSon
     if finallyBlock.kind == nkFinally:
       genStmts(p, finallyBlock.sons[0])
-    lineCg(p, cpsStmts, "throw;$n")
+    line(p, cpsStmts, ~"throw;$n")
     endBlock(p)
   
   lineF(p, cpsStmts, "}$n") # end of catch block
@@ -677,19 +675,19 @@ proc genTryStmt(p: BProc, t: PNode) =
   genLineDir(p, t)
   var safePoint = getTempName()
   discard cgsym(p.module, "E_Base")
-  lineCg(p, cpsLocals, "#TSafePoint $1;$n", [safePoint])
-  lineCg(p, cpsStmts, "#pushSafePoint(&$1);$n", [safePoint])
-  lineF(p, cpsStmts, "$1.status = setjmp($1.context);$n", [safePoint])
+  linefmt(p, cpsLocals, "#TSafePoint $1;$n", safePoint)
+  linefmt(p, cpsStmts, "#pushSafePoint(&$1);$n", safePoint)
+  linefmt(p, cpsStmts, "$1.status = setjmp($1.context);$n", safePoint)
   startBlock(p, "if ($1.status == 0) {$n", [safePoint])
   var length = sonsLen(t)
   add(p.nestedTryStmts, t)
   genStmts(p, t.sons[0])
-  linecg(p, cpsStmts, "#popSafePoint();$n")
+  linefmt(p, cpsStmts, "#popSafePoint();$n")
   endBlock(p)
   startBlock(p, "else {$n")
-  lineCg(p, cpsStmts, "#popSafePoint();$n")
+  linefmt(p, cpsStmts, "#popSafePoint();$n")
   if optStackTrace in p.Options:
-    lineCg(p, cpsStmts, "#setFrame((TFrame*)&F);$n")
+    linefmt(p, cpsStmts, "#setFrame((TFrame*)&F);$n")
   inc p.inExceptBlock
   var i = 1
   while (i < length) and (t.sons[i].kind == nkExceptBranch):
@@ -698,9 +696,9 @@ proc genTryStmt(p: BProc, t: PNode) =
       # general except section:
       if i > 1: lineF(p, cpsStmts, "else")
       startBlock(p)
-      lineCg(p, cpsStmts, "$1.status = 0;$n", [safePoint])
+      linefmt(p, cpsStmts, "$1.status = 0;$n", safePoint)
       genStmts(p, t.sons[i].sons[0])
-      lineCg(p, cpsStmts, "#popCurrentException();$n")
+      linefmt(p, cpsStmts, "#popCurrentException();$n")
       endBlock(p)
     else:
       var orExpr: PRope = nil
@@ -712,9 +710,9 @@ proc genTryStmt(p: BProc, t: PNode) =
               [genTypeInfo(p.module, t.sons[i].sons[j].typ)])
       if i > 1: line(p, cpsStmts, "else ")
       startBlock(p, "if ($1) {$n", [orExpr])
-      lineCg(p, cpsStmts, "$1.status = 0;$n", [safePoint])
+      linefmt(p, cpsStmts, "$1.status = 0;$n", safePoint)
       genStmts(p, t.sons[i].sons[blen-1])
-      lineCg(p, cpsStmts, "#popCurrentException();$n")
+      linefmt(p, cpsStmts, "#popCurrentException();$n")
       endBlock(p)
     inc(i)
   dec p.inExceptBlock
@@ -722,7 +720,7 @@ proc genTryStmt(p: BProc, t: PNode) =
   endBlock(p) # end of else block
   if i < length and t.sons[i].kind == nkFinally:
     genSimpleBlock(p, t.sons[i].sons[0])
-  lineCg(p, cpsStmts, "if ($1.status != 0) #reraiseException();$n", [safePoint])
+  linefmt(p, cpsStmts, "if ($1.status != 0) #reraiseException();$n", safePoint)
 
 proc genAsmOrEmitStmt(p: BProc, t: PNode): PRope = 
   for i in countup(0, sonsLen(t) - 1): 
diff --git a/compiler/ccgtrav.nim b/compiler/ccgtrav.nim
index 5f6f14548..938330f1c 100644
--- a/compiler/ccgtrav.nim
+++ b/compiler/ccgtrav.nim
@@ -65,9 +65,9 @@ proc genTraverseProc(c: var TTraversalClosure, accessor: PRope, typ: PType) =
     let arraySize = lengthOrd(typ.sons[0])
     var i: TLoc
     getTemp(p, getSysType(tyInt), i)
-    lineF(p, cpsStmts, "for ($1 = 0; $1 < $2; $1++) {$n",
-        i.r, arraySize.toRope)
-    genTraverseProc(c, ropef("$1[$2]", accessor, i.r), typ.sons[1])
+    linefmt(p, cpsStmts, "for ($1 = 0; $1 < $2; $1++) {$n",
+            i.r, arraySize.toRope)
+    genTraverseProc(c, rfmt(nil, "$1[$2]", accessor, i.r), typ.sons[1])
     lineF(p, cpsStmts, "}$n")
   of tyObject:
     for i in countup(0, sonsLen(typ) - 1):
@@ -76,12 +76,12 @@ proc genTraverseProc(c: var TTraversalClosure, accessor: PRope, typ: PType) =
   of tyTuple:
     let typ = GetUniqueType(typ)
     for i in countup(0, sonsLen(typ) - 1):
-      genTraverseProc(c, ropef("$1.Field$2", accessor, i.toRope), typ.sons[i])
+      genTraverseProc(c, rfmt(nil, "$1.Field$2", accessor, i.toRope), typ.sons[i])
   of tyRef, tyString, tySequence:
     lineCg(p, cpsStmts, c.visitorFrmt, accessor)
   of tyProc:
     if typ.callConv == ccClosure:
-      lineCg(p, cpsStmts, c.visitorFrmt, ropef("$1.ClEnv", accessor))
+      lineCg(p, cpsStmts, c.visitorFrmt, rfmt(nil, "$1.ClEnv", accessor))
   else:
     nil
 
diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim
index 62426a435..5a9427a7b 100755
--- a/compiler/ccgtypes.nim
+++ b/compiler/ccgtypes.nim
@@ -70,12 +70,12 @@ proc mangleName(s: PSym): PRope =
     if gCmd == cmdCompileToLLVM: 
       case s.kind
       of skProc, skMethod, skConverter, skConst, skIterator: 
-        result = toRope("@")
+        result = ~"@"
       of skVar, skForVar, skResult, skLet: 
-        if sfGlobal in s.flags: result = toRope("@")
-        else: result = toRope("%")
+        if sfGlobal in s.flags: result = ~"@"
+        else: result = ~"%"
       of skTemp, skParam, skType, skEnumField, skModule: 
-        result = toRope("%")
+        result = ~"%"
       else: InternalError(s.info, "mangleName")
     when oKeepVariableNames:
       let keepOrigName = s.kind in skLocalVars - {skForVar} and 
@@ -126,14 +126,14 @@ proc mangleName(s: PSym): PRope =
       # These are not properly scoped now - we need to add blocks
       # around for loops in transf
       if keepOrigName:
-        result = s.name.s.mangle.toRope
+        result = s.name.s.mangle.newRope
       else:
-        app(result, toRope(mangle(s.name.s)))
-        app(result, "_")
+        app(result, newRope(mangle(s.name.s)))
+        app(result, ~"_")
         app(result, toRope(s.id))
     else:
-      app(result, toRope(mangle(s.name.s)))
-      app(result, "_")
+      app(result, newRope(mangle(s.name.s)))
+      app(result, ~"_")
       app(result, toRope(s.id))
     s.loc.r = result
 
@@ -148,11 +148,9 @@ proc containsCompileTimeOnly(t: PType): bool =
         return true
   return false
 
-var anonTypeName = toRope"TY"
-
 proc typeName(typ: PType): PRope =
   result = if typ.sym != nil: typ.sym.name.s.mangle.toRope
-           else: anonTypeName
+           else: ~"TY"
 
 proc getTypeName(typ: PType): PRope = 
   if (typ.sym != nil) and ({sfImportc, sfExportc} * typ.sym.flags != {}) and
@@ -161,7 +159,7 @@ proc getTypeName(typ: PType): PRope =
   else:
     if typ.loc.r == nil:
       typ.loc.r = if gCmd != cmdCompileToLLVM: con(typ.typeName, typ.id.toRope)
-                  else: con(["%".toRope, typ.typeName, typ.id.toRope])
+                  else: con([~"%", typ.typeName, typ.id.toRope])
     result = typ.loc.r
   if result == nil: InternalError("getTypeName: " & $typ.kind)
   
@@ -249,10 +247,10 @@ proc CacheGetType(tab: TIdTable, key: PType): PRope =
   result = PRope(IdTableGet(tab, key))
 
 proc getTempName(): PRope = 
-  result = ropeff("TMP$1", "%TMP$1", [toRope(backendId())])
+  result = rfmt(nil, "TMP$1", toRope(backendId()))
 
 proc getGlobalTempName(): PRope = 
-  result = ropeff("TMP$1", "@TMP$1", [toRope(backendId())])
+  result = rfmt(nil, "TMP$1", toRope(backendId()))
 
 proc ccgIntroducedPtr(s: PSym): bool = 
   var pt = skipTypes(s.typ, abstractInst)
@@ -273,7 +271,7 @@ proc ccgIntroducedPtr(s: PSym): bool =
   else: result = false
   
 proc fillResult(param: PSym) = 
-  fillLoc(param.loc, locParam, param.typ, ropeff("Result", "%Result", []), 
+  fillLoc(param.loc, locParam, param.typ, ~"Result",
           OnStack)
   if (mapReturnType(param.typ) != ctArray) and IsInvalidReturnType(param.typ): 
     incl(param.loc.flags, lfIndirect)
@@ -287,25 +285,32 @@ proc getParamTypeDesc(m: BModule, t: PType, check: var TIntSet): PRope =
         return getTypeDescAux(m, b, check)
   result = getTypeDescAux(m, t, check)
 
+proc paramStorageLoc(param: PSym): TStorageLoc =
+  if param.typ.skipTypes({tyVar}).kind notin {tyArray, tyOpenArray}:
+    result = OnStack
+  else:
+    result = OnUnknown
+
 proc genProcParams(m: BModule, t: PType, rettype, params: var PRope, 
                    check: var TIntSet, declareEnvironment=true) = 
   params = nil
   if (t.sons[0] == nil) or isInvalidReturnType(t.sons[0]): 
-    rettype = toRope("void")
+    rettype = ~"void"
   else: 
     rettype = getTypeDescAux(m, t.sons[0], check)
   for i in countup(1, sonsLen(t.n) - 1): 
     if t.n.sons[i].kind != nkSym: InternalError(t.n.info, "genProcParams")
     var param = t.n.sons[i].sym
     if isCompileTimeOnly(param.typ): continue
-    if params != nil: app(params, ", ")
-    fillLoc(param.loc, locParam, param.typ, mangleName(param), OnStack)
+    if params != nil: app(params, ~", ")
+    fillLoc(param.loc, locParam, param.typ, mangleName(param),
+            param.paramStorageLoc)
     app(params, getParamTypeDesc(m, param.typ, check))
     if ccgIntroducedPtr(param): 
-      app(params, "*")
+      app(params, ~"*")
       incl(param.loc.flags, lfIndirect)
       param.loc.s = OnUnknown
-    app(params, " ")
+    app(params, ~" ")
     app(params, param.loc.r)
     # declare the len field for open arrays:
     var arr = param.typ
diff --git a/compiler/ccgutils.nim b/compiler/ccgutils.nim
index ad7f22bbc..90696825b 100755
--- a/compiler/ccgutils.nim
+++ b/compiler/ccgutils.nim
@@ -59,6 +59,12 @@ var
 proc initTypeTables() = 
   for i in countup(low(TTypeKind), high(TTypeKind)): InitIdTable(gTypeTable[i])
 
+proc resetCaches* =
+  ## XXX: fix that more properly
+  initTypeTables()
+  for i in low(gCanonicalTypes)..high(gCanonicalTypes):
+    gCanonicalTypes[i] = nil
+
 when false:
   proc echoStats*() =
     for i in countup(low(TTypeKind), high(TTypeKind)): 
@@ -148,36 +154,11 @@ proc TableGetType*(tab: TIdTable, key: PType): PObject =
         if sameType(t, key): 
           return tab.data[h].val
 
-proc toCChar*(c: Char): string = 
-  case c
-  of '\0'..'\x1F', '\x80'..'\xFF': result = '\\' & toOctal(c)
-  of '\'', '\"', '\\': result = '\\' & c
-  else: result = $(c)
-
 proc makeSingleLineCString*(s: string): string =
   result = "\""
   for c in items(s):
     result.add(c.toCChar)
   result.add('\"')
-  
-proc makeCString*(s: string): PRope = 
-  # BUGFIX: We have to split long strings into many ropes. Otherwise
-  # this could trigger an InternalError(). See the ropes module for
-  # further information.
-  const 
-    MaxLineLength = 64
-  result = nil
-  var res = "\""
-  for i in countup(0, len(s) - 1):
-    if (i + 1) mod MaxLineLength == 0:
-      add(res, '\"')
-      add(res, tnl)
-      app(result, toRope(res)) # reset:
-      setlen(res, 1)
-      res[0] = '\"'
-    add(res, toCChar(s[i]))
-  add(res, '\"')
-  app(result, toRope(res))
 
 proc makeLLVMString*(s: string): PRope = 
   const MaxLineLength = 64
diff --git a/compiler/cgen.nim b/compiler/cgen.nim
index 001a6fbee..399785c82 100755
--- a/compiler/cgen.nim
+++ b/compiler/cgen.nim
@@ -19,7 +19,6 @@ import
 when options.hasTinyCBackend:
   import tccgen
 
-proc cgenPass*(): TPass
 # implementation
 
 var
@@ -38,6 +37,10 @@ proc addForwardedProc(m: BModule, prc: PSym) =
   m.forwardedProcs.add(prc)
   inc(gForwardedProcsCounter)
 
+proc getCgenModule(s: PSym): BModule =
+  result = if s.position >= 0 and s.position < gModules.len: gModules[s.position]
+           else: nil
+
 proc findPendingModule(m: BModule, s: PSym): BModule = 
   var ms = getModule(s)
   result = gModules[ms.position]
@@ -103,10 +106,10 @@ proc ropecg(m: BModule, frmt: TFormatStr, args: varargs[PRope]): PRope =
           internalError("ropes: invalid format string $" & $j)
         app(result, args[j-1])
       of 'n':
-        if optLineDir notin gOptions: app(result, tnl)
+        if optLineDir notin gOptions: app(result, rnl)
         inc(i)
       of 'N': 
-        app(result, tnl)
+        app(result, rnl)
         inc(i)
       else: InternalError("ropes: invalid format string $" & frmt[i])
     elif frmt[i] == '#' and frmt[i+1] in IdentStartChars:
@@ -130,6 +133,80 @@ proc ropecg(m: BModule, frmt: TFormatStr, args: varargs[PRope]): PRope =
     if i - 1 >= start: 
       app(result, substr(frmt, start, i - 1))
 
+const compileTimeRopeFmt = not defined(booting)
+
+when compileTimeRopeFmt:
+  import macros
+
+  type TFmtFragmentKind = enum
+    ffSym,
+    ffLit,
+    ffParam
+
+  type TFragment = object
+    case kind: TFmtFragmentKind
+    of ffSym, ffLit:
+      value: string
+    of ffParam:
+      intValue: int
+
+  iterator fmtStringFragments(s: string): tuple[kind: TFmtFragmentKind,
+                                                value: string,
+                                                intValue: int] =
+    # This is a bit less featured version of the ropecg's algorithm
+    # (be careful when replacing ropecg calls)
+    var
+      i = 0
+      length = s.len
+
+    while i < length:
+      var start = i
+      case s[i]
+      of '$':
+        let n = s[i+1]
+        case n
+        of '$':
+          inc i, 2
+        of '0'..'9':
+          # XXX: use the new case object construction syntax when it's ready
+          yield (kind: ffParam, value: "", intValue: n.ord - ord('1'))
+          inc i, 2
+          start = i
+        else:
+          inc i
+      of '#':
+        inc i
+        var j = i
+        while s[i] in IdentChars: inc i
+        yield (kind: ffSym, value: substr(s, j, i-1), intValue: 0)
+        start = i
+      else: nil
+
+      while i < length:
+        if s[i] != '$' and s[i] != '#': inc i
+        else: break
+
+      if i - 1 >= start:
+        yield (kind: ffLit, value: substr(s, start, i-1), intValue: 0)
+
+  macro rfmt(m: BModule, fmt: expr[string], args: varargs[PRope]): expr =
+    ## Experimental optimized rope-formatting operator
+    ## The run-time code it produces will be very fast, but will it speed up
+    ## the compilation of nimrod itself or will the macro execution time
+    ## offset the gains?
+    result = newCall(bindSym"ropeConcat")
+    for frag in fmtStringFragments(fmt.strVal):
+      case frag.kind
+      of ffSym:
+        result.add(newCall(bindSym"cgsym", m, newStrLitNode(frag.value)))
+      of ffLit:
+        result.add(newCall(bindSym"~", newStrLitNode(frag.value)))
+      of ffParam:
+        result.add(args[frag.intValue])
+else:
+  template rfmt(m: BModule, fmt: expr[string], args: varargs[PRope]): expr =
+    ropecg(m, fmt, args)
+
 proc appcg(m: BModule, c: var PRope, frmt: TFormatStr, 
            args: varargs[PRope]) = 
   app(c, ropecg(m, frmt, args))
@@ -161,6 +238,15 @@ proc lineCg(p: BProc, s: TCProcSection, frmt: TFormatStr,
                args: varargs[PRope]) =
   app(p.s(s), indentLine(p, ropecg(p.module, frmt, args)))
 
+when compileTimeRopeFmt:
+  template linefmt(p: BProc, s: TCProcSection, frmt: TFormatStr,
+                   args: varargs[PRope]) =
+    line(p, s, rfmt(p.module, frmt, args))
+else:
+  proc linefmt(p: BProc, s: TCProcSection, frmt: TFormatStr,
+               args: varargs[PRope]) =
+    app(p.s(s), indentLine(p, ropecg(p.module, frmt, args)))
+
 proc appLineCg(p: BProc, r: var PRope, frmt: TFormatStr,
                args: varargs[PRope]) =
   app(r, indentLine(p, ropecg(p.module, frmt, args)))
@@ -185,15 +271,18 @@ proc genCLineDir(r: var PRope, info: TLineInfo) =
 
 proc genLineDir(p: BProc, t: PNode) = 
   var line = t.info.safeLineNm
+  if optEmbedOrigSrc in gGlobalOptions:
+    app(p.s(cpsStmts), con(~"//", t.info.sourceLine, rnl))
   genCLineDir(p.s(cpsStmts), t.info.toFullPath, line)
   if ({optStackTrace, optEndb} * p.Options == {optStackTrace, optEndb}) and
       (p.prc == nil or sfPure notin p.prc.flags): 
-    lineCg(p, cpsStmts, "#endb($1);$n", [toRope(line)])
+    linefmt(p, cpsStmts, "#endb($1);$n", toRope(line))
   elif ({optLineTrace, optStackTrace} * p.Options ==
       {optLineTrace, optStackTrace}) and
-      (p.prc == nil or sfPure notin p.prc.flags): 
-    lineF(p, cpsStmts, "F.line = $1;F.filename = $2;$n", 
-        [toRope(line), makeCString(toFilename(t.info).extractFilename)])
+      (p.prc == nil or sfPure notin p.prc.flags):
+   
+    linefmt(p, cpsStmts, "nimln($1, $2);$n",
+            line.toRope, t.info.quotedFilename)
 
 include "ccgtypes.nim"
 
@@ -228,11 +317,11 @@ proc genObjectInit(p: BProc, section: TCProcSection, t: PType, a: TLoc,
       while (s.kind == tyObject) and (s.sons[0] != nil):
         app(r, ".Sup")
         s = skipTypes(s.sons[0], abstractInst)
-    lineCg(p, section, "$1.m_type = $2;$n", [r, genTypeInfo(p.module, t)])
+    linefmt(p, section, "$1.m_type = $2;$n", r, genTypeInfo(p.module, t))
   of frEmbedded:
     # worst case for performance:
     var r = if takeAddr: addrLoc(a) else: rdLoc(a)
-    lineCg(p, section, "#objectInit($1, $2);$n", [r, genTypeInfo(p.module, t)])
+    linefmt(p, section, "#objectInit($1, $2);$n", r, genTypeInfo(p.module, t))
 
 type
   TAssignmentFlag = enum
@@ -255,27 +344,27 @@ proc resetLoc(p: BProc, loc: var TLoc) =
       nilLoc.r = toRope("NIM_NIL")
       genRefAssign(p, loc, nilLoc, {afSrcIsNil})
     else:
-      lineF(p, cpsStmts, "$1 = 0;$n", [rdLoc(loc)])
+      linefmt(p, cpsStmts, "$1 = 0;$n", rdLoc(loc))
   else:
     if loc.s != OnStack:
-      lineCg(p, cpsStmts, "#genericReset((void*)$1, $2);$n",
-        [addrLoc(loc), genTypeInfo(p.module, loc.t)])
+      linefmt(p, cpsStmts, "#genericReset((void*)$1, $2);$n",
+              addrLoc(loc), genTypeInfo(p.module, loc.t))
       # XXX: generated reset procs should not touch the m_type
       # field, so disabling this should be safe:
       genObjectInit(p, cpsStmts, loc.t, loc, true)
     else:
-      lineF(p, cpsStmts, "memset((void*)$1, 0, sizeof($2));$n",
-        [addrLoc(loc), rdLoc(loc)])
+      linefmt(p, cpsStmts, "memset((void*)$1, 0, sizeof($2));$n",
+              addrLoc(loc), rdLoc(loc))
       # XXX: We can be extra clever here and call memset only 
       # on the bytes following the m_type field?
       genObjectInit(p, cpsStmts, loc.t, loc, true)
 
 proc constructLoc(p: BProc, loc: TLoc, section = cpsStmts) =
   if not isComplexValueType(skipTypes(loc.t, abstractRange)):
-    lineF(p, section, "$1 = 0;$n", [rdLoc(loc)])
+    linefmt(p, section, "$1 = 0;$n", rdLoc(loc))
   else:
-    lineF(p, section, "memset((void*)$1, 0, sizeof($2));$n",
-       [addrLoc(loc), rdLoc(loc)])
+    linefmt(p, section, "memset((void*)$1, 0, sizeof($2));$n",
+            addrLoc(loc), rdLoc(loc))
     genObjectInit(p, section, loc.t, loc, true)
 
 proc initLocalVar(p: BProc, v: PSym, immediateAsgn: bool) =
@@ -302,7 +391,7 @@ proc getTemp(p: BProc, t: PType, result: var TLoc) =
     result.r = con("%LOC", toRope(p.labels))
   else: 
     result.r = con("LOC", toRope(p.labels))
-    lineF(p, cpsLocals, "$1 $2;$n", [getTypeDesc(p.module, t), result.r])
+    linefmt(p, cpsLocals, "$1 $2;$n", getTypeDesc(p.module, t), result.r)
   result.k = locTemp
   result.a = - 1
   result.t = getUniqueType(t)
@@ -328,11 +417,11 @@ proc keepAlive(p: BProc, toKeepAlive: TLoc) =
     result.flags = {}
 
     if not isComplexValueType(skipTypes(toKeepAlive.t, abstractVarRange)):
-      lineF(p, cpsStmts, "$1 = $2;$n", [rdLoc(result), rdLoc(toKeepAlive)])
+      linefmt(p, cpsStmts, "$1 = $2;$n", rdLoc(result), rdLoc(toKeepAlive))
     else:
-      lineCg(p, cpsStmts,
+      linefmt(p, cpsStmts,
            "memcpy((void*)$1, (NIM_CONST void*)$2, sizeof($3));$n",
-           [addrLoc(result), addrLoc(toKeepAlive), rdLoc(result)])
+           addrLoc(result), addrLoc(toKeepAlive), rdLoc(result))
 
 proc initGCFrame(p: BProc): PRope =
   if p.gcFrameId > 0: result = ropef("struct {$1} GCFRAME;$n", p.gcFrameType)
@@ -599,36 +688,20 @@ proc generateHeaders(m: BModule) =
       appf(m.s[cfsHeaders], "$N#include $1$N", [toRope(it.data)])
     it = PStrEntry(it.Next)
 
-proc getFrameDecl(p: BProc) = 
-  var slots: PRope
-  if p.frameLen > 0: 
-    discard cgsym(p.module, "TVarSlot")
-    slots = ropeff("  TVarSlot s[$1];$n", ", [$1 x %TVarSlot]", 
-                   [toRope(p.frameLen)])
-  else: 
-    slots = nil
-  lineFF(p, cpsLocals, "volatile struct {TFrame* prev;" &
-      "NCSTRING procname;NI line;NCSTRING filename;" &
-      "NI len;$1} F;$n",
-      "%TF = type {%TFrame*, i8*, %NI, %NI$1}$n" & 
-      "%F = alloca %TF$n", [slots])
-  inc(p.labels)
-  prepend(p.s(cpsInit), indentLine(p, ropeff("F.len = $1;$n", 
-      "%LOC$2 = getelementptr %TF %F, %NI 4$n" &
-      "store %NI $1, %NI* %LOC$2$n", [toRope(p.frameLen), toRope(p.labels)])))
-
 proc retIsNotVoid(s: PSym): bool = 
   result = (s.typ.sons[0] != nil) and not isInvalidReturnType(s.typ.sons[0])
 
-proc initFrame(p: BProc, procname, filename: PRope): PRope = 
-  result = ropecg(p.module, 
-    "\tF.procname = $1;$n" &
-    "\tF.filename = $2;$n" & 
-    "\tF.line = 0;$n" & 
-    "\t#pushFrame((TFrame*)&F);$n", [procname, filename])
+proc initFrame(p: BProc, procname, filename: PRope): PRope =
+  discard cgsym(p.module, "pushFrame")
+  if p.frameLen > 0:
+    discard cgsym(p.module, "TVarSlot")
+    result = rfmt(nil, "\tnimfrs($1, $2, $3)$N",
+                  procname, filename, p.frameLen.toRope)
+  else:
+    result = rfmt(nil, "\tnimfr($1, $2)$N", procname, filename)
 
 proc deinitFrame(p: BProc): PRope =
-  result = ropecg(p.module, "\t#popFrame();$n")
+  result = rfmt(p.module, "\t#popFrame();$n")
 
 proc closureSetup(p: BProc, prc: PSym) =
   if tfCapturesEnv notin prc.typ.flags: return
@@ -640,8 +713,8 @@ proc closureSetup(p: BProc, prc: PSym) =
   #echo "created environment: ", env.id, " for ", prc.name.s
   assignLocalVar(p, env)
   # generate cast assignment:
-  lineCg(p, cpsStmts, "$1 = ($2) ClEnv;$n", rdLoc(env.loc),
-         getTypeDesc(p.module, env.typ))
+  linefmt(p, cpsStmts, "$1 = ($2) ClEnv;$n",
+          rdLoc(env.loc), getTypeDesc(p.module, env.typ))
 
 proc genProcAux(m: BModule, prc: PSym) =
   var p = newProc(prc, m)
@@ -655,7 +728,7 @@ proc genProcAux(m: BModule, prc: PSym) =
       # declare the result symbol:
       assignLocalVar(p, res)
       assert(res.loc.r != nil)
-      returnStmt = ropeff("\treturn $1;$n", "ret $1$n", [rdLoc(res.loc)])
+      returnStmt = rfmt(nil, "\treturn $1;$n", rdLoc(res.loc))
       initLocalVar(p, res, immediateAsgn=false)
     else:
       fillResult(res)
@@ -671,17 +744,15 @@ proc genProcAux(m: BModule, prc: PSym) =
   genStmts(p, prc.getBody) # modifies p.locals, p.init, etc.
   var generatedProc: PRope
   if sfPure in prc.flags: 
-    generatedProc = ropeff("$N$1 {$n$2$3$4}$N$N", "define $1 {$n$2$3$4}$N",
-        [header, p.s(cpsLocals), p.s(cpsInit), p.s(cpsStmts)])
+    generatedProc = rfmt(nil, "$N$1 {$n$2$3$4}$N$N",
+                         header, p.s(cpsLocals), p.s(cpsInit), p.s(cpsStmts))
   else:
-    generatedProc = ropeff("$N$1 {$N", "$Ndefine $1 {$N", [header])
+    generatedProc = rfmt(nil, "$N$1 {$N", header)
     app(generatedProc, initGCFrame(p))
     if optStackTrace in prc.options: 
-      getFrameDecl(p)
       app(generatedProc, p.s(cpsLocals))
       var procname = CStringLit(p, generatedProc, prc.name.s)
-      var filename = CStringLit(p, generatedProc, toFilename(prc.info))
-      app(generatedProc, initFrame(p, procname, filename))
+      app(generatedProc, initFrame(p, procname, prc.info.quotedFilename))
     else: 
       app(generatedProc, p.s(cpsLocals))
     if (optProfiler in prc.options) and (gCmd != cmdCompileToLLVM):
@@ -689,11 +760,11 @@ proc genProcAux(m: BModule, prc: PSym) =
       appcg(p, cpsInit, "\t#nimProfile();$n", [])
     app(generatedProc, p.s(cpsInit))
     app(generatedProc, p.s(cpsStmts))
-    if p.beforeRetNeeded: appf(generatedProc, "\tBeforeRet: ;$n")
+    if p.beforeRetNeeded: app(generatedProc, ~"\tBeforeRet: ;$n")
     app(generatedProc, deinitGCFrame(p))
     if optStackTrace in prc.options: app(generatedProc, deinitFrame(p))
     app(generatedProc, returnStmt)
-    appf(generatedProc, "}$N")
+    app(generatedProc, ~"}$N")
   app(m.s[cfsProcs], generatedProc)
   
 proc genProcPrototype(m: BModule, sym: PSym) = 
@@ -702,11 +773,11 @@ proc genProcPrototype(m: BModule, sym: PSym) =
   if lfDynamicLib in sym.loc.Flags:
     if getModule(sym).id != m.module.id and
         not ContainsOrIncl(m.declaredThings, sym.id): 
-      appf(m.s[cfsVars], "extern $1 $2;$n", 
-           [getTypeDesc(m, sym.loc.t), mangleDynLibProc(sym)])
+      app(m.s[cfsVars], rfmt(nil, "extern $1 $2;$n",
+                        getTypeDesc(m, sym.loc.t), mangleDynLibProc(sym)))
       if gCmd == cmdCompileToLLVM: incl(sym.loc.flags, lfIndirect)
   elif not ContainsOrIncl(m.declaredProtos, sym.id): 
-    appf(m.s[cfsProcHeaders], "$1;$n", [genProcHeader(m, sym)])
+    app(m.s[cfsProcHeaders], rfmt(nil, "$1;$n", genProcHeader(m, sym)))
 
 proc genProcNoForward(m: BModule, prc: PSym) = 
   fillProcLoc(prc)
@@ -915,11 +986,6 @@ proc genInitCode(m: BModule) =
   if m.nimTypes > 0: 
     appcg(m, m.s[cfsTypeInit1], "static #TNimType $1[$2];$n", 
           [m.nimTypesName, toRope(m.nimTypes)])
-  if optStackTrace in m.initProc.options and not m.FrameDeclared:
-    # BUT: the generated init code might depend on a current frame, so
-    # declare it nevertheless:
-    m.FrameDeclared = true
-    getFrameDecl(m.initProc)
   
   app(prc, initGCFrame(m.initProc))
  
@@ -928,11 +994,16 @@ proc genInitCode(m: BModule) =
   app(prc, m.preInitProc.s(cpsLocals))
   app(prc, genSectionEnd(cpsLocals))
 
-  if optStackTrace in m.initProc.options and not m.PreventStackTrace: 
-    var procname = CStringLit(m.initProc, prc, m.module.name.s)
-    var filename = CStringLit(m.initProc, prc, toFilename(m.module.info))
-    app(prc, initFrame(m.initProc, procname, filename))
- 
+  if optStackTrace in m.initProc.options and not m.FrameDeclared:
+    # BUT: the generated init code might depend on a current frame, so
+    # declare it nevertheless:
+    m.FrameDeclared = true
+    if not m.PreventStackTrace:
+      var procname = CStringLit(m.initProc, prc, m.module.name.s)
+      app(prc, initFrame(m.initProc, procname, m.module.info.quotedFilename))
+    else:
+      app(prc, ~"\tvolatile TFrame F; F.len = 0;$N")
+    
   app(prc, genSectionStart(cpsInit))
   app(prc, m.preInitProc.s(cpsInit))
   app(prc, m.initProc.s(cpsInit))
@@ -979,8 +1050,8 @@ proc genModule(m: BModule, cfilenoext: string): PRope =
     app(result, m.s[i])
     app(result, genSectionEnd(i))
   app(result, m.s[cfsInitProc])
-  
-proc rawNewModule(module: PSym, filename: string): BModule = 
+
+proc rawNewModule(module: PSym, filename: string): BModule =
   new(result)
   InitLinkedList(result.headerFiles)
   result.declaredThings = initIntSet()
@@ -1001,17 +1072,67 @@ proc rawNewModule(module: PSym, filename: string): BModule =
   result.nimTypesName = getTempName()
   result.PreventStackTrace = sfSystemModule in module.flags
 
-proc newModule(module: PSym, filename: string): BModule = 
-  result = rawNewModule(module, filename)
-  if gModules.len <= module.position: gModules.setLen(module.position + 1)
+proc nullify[T](arr: var T) =
+  for i in low(arr)..high(arr):
+    arr[i] = nil
+
+proc resetModule*(m: var BModule) =
+  # between two compilations in CAAS mode, we can throw
+  # away all the data that was written to disk
+  InitLinkedList(m.headerFiles)
+  m.declaredProtos = initIntSet()
+  initIdTable(m.forwTypeCache)
+  m.initProc = newProc(nil, m)
+  m.initProc.options = gOptions
+  m.preInitProc = newProc(nil, m)
+  initNodeTable(m.dataCache)
+  m.typeStack = @[]
+  m.forwardedProcs = @[]
+  m.typeNodesName = getTempName()
+  m.nimTypesName = getTempName()
+  m.PreventStackTrace = sfSystemModule in m.module.flags
+  nullify m.s
+  m.usesThreadVars = false
+  m.typeNodes = 0
+  m.nimTypes = 0
+  nullify m.extensionLoaders
+  
+  # indicate that this is now cached module
+  # the cache will be invalidated by nullifying gModules
+  m.fromCache = true
+  
+  # we keep only the "merge info" information for the module
+  # and the properties that can't change:
+  # m.filename
+  # m.cfilename
+  # m.isHeaderFile
+  # m.module ?
+  # m.typeCache
+  # m.declaredThings
+  # m.typeInfoMarker
+  # m.labels
+  # m.FrameDeclared
+
+proc resetCgenModules* =
+  for m in cgenModules(): resetModule(m)
+
+proc rawNewModule(module: PSym): BModule =
+  result = rawNewModule(module, module.filename)
+
+proc newModule(module: PSym): BModule =
+  # we should create only one cgen module for each module sym
+  InternalAssert getCgenModule(module) == nil
+
+  result = rawNewModule(module)
+  growCache gModules, module.position
   gModules[module.position] = result
 
   if (optDeadCodeElim in gGlobalOptions): 
     if (sfDeadCodeElim in module.flags): 
-      InternalError("added pending module twice: " & filename)
-  
-proc myOpen(module: PSym, filename: string): PPassContext = 
-  result = newModule(module, filename)
+      InternalError("added pending module twice: " & module.filename)
+
+proc myOpen(module: PSym): PPassContext = 
+  result = newModule(module)
   if optGenIndex in gGlobalOptions and generatedHeader == nil:
     let f = if headerFile.len > 0: headerFile else: gProjectFull
     generatedHeader = rawNewModule(module,
@@ -1041,9 +1162,9 @@ proc writeHeader(m: BModule) =
 proc getCFile(m: BModule): string =
   result = changeFileExt(completeCFilePath(m.cfilename), cExt)
 
-proc myOpenCached(module: PSym, filename: string, 
-                  rd: PRodReader): PPassContext = 
-  var m = newModule(module, filename)
+proc myOpenCached(module: PSym, rd: PRodReader): PPassContext =
+  assert optSymbolFiles in gGlobalOptions
+  var m = newModule(module)
   readMergeInfo(getCFile(m), m)
   result = m
 
@@ -1116,8 +1237,28 @@ proc writeModule(m: BModule, pending: bool) =
     # ``system.c`` but then compilation fails due to an error. This means
     # that ``system.o`` is missing, so we need to call the C compiler for it:
     addFileToCompile(cfilenoext)
+  
+  addFileToLink(cfilenoext)
+
+proc updateCachedModule(m: BModule) =
+  let cfile = getCFile(m)
+  let cfilenoext = changeFileExt(cfile, "")
+  
+  if mergeRequired(m):
+    mergeFiles(cfile, m)
+    genInitCode(m)
+    finishTypeDescriptions(m)
+    var code = genModule(m, cfilenoext)
+    writeRope(code, cfile)
+    addFileToCompile(cfilenoext)
+
   addFileToLink(cfilenoext)
 
+proc cgenCaasUpdate* =
+  # XXX(zah): clean-up the fromCache mess
+  for m in cgenModules():
+    if m.fromCache: m.updateCachedModule
+
 proc myClose(b: PPassContext, n: PNode): PNode = 
   result = n
   if b == nil or passes.skipCodegen(n): return 
@@ -1136,18 +1277,15 @@ proc myClose(b: PPassContext, n: PNode): PNode =
     # deps are allowed (and the system module is processed in the wrong
     # order anyway)
     if generatedHeader != nil: finishModule(generatedHeader)
-    while gForwardedProcsCounter > 0: 
-      for i in countup(0, high(gModules)): 
-        finishModule(gModules[i])
-    for i in countup(0, high(gModules)): 
-      writeModule(gModules[i], pending=true)
+    while gForwardedProcsCounter > 0:
+      for m in cgenModules():
+        if not m.fromCache:
+          finishModule(m)
+    for m in cgenModules():
+      if not m.fromCache:
+        writeModule(m, pending=true)
     writeMapping(gMapping)
     if generatedHeader != nil: writeHeader(generatedHeader)
 
-proc cgenPass(): TPass = 
-  initPass(result)
-  result.open = myOpen
-  result.openCached = myOpenCached
-  result.process = myProcess
-  result.close = myClose
+const cgenPass* = makePass(myOpen, myOpenCached, myProcess, myClose)
 
diff --git a/compiler/cgendata.nim b/compiler/cgendata.nim
index d0cc07097..32e9fb4ce 100644
--- a/compiler/cgendata.nim
+++ b/compiler/cgendata.nim
@@ -141,3 +141,10 @@ proc newProc*(prc: PSym, module: BModule): BProc =
   newSeq(result.blocks, 1)
   result.nestedTryStmts = @[]
 
+iterator cgenModules*: var BModule =
+  for i in 0..high(gModules):
+    # ultimately, we are iterating over the file ids here.
+    # some "files" won't have an associated cgen module (like stdin)
+    # and we must skip over them.
+    if gModules[i] != nil: yield gModules[i]
+
diff --git a/compiler/commands.nim b/compiler/commands.nim
index d999dc065..a26626cc4 100755
--- a/compiler/commands.nim
+++ b/compiler/commands.nim
@@ -23,6 +23,7 @@ type
 
 proc ProcessCommand*(switch: string, pass: TCmdLinePass)
 proc processSwitch*(switch, arg: string, pass: TCmdlinePass, info: TLineInfo)
+
 # implementation
 
 const
@@ -247,6 +248,9 @@ proc processSwitch(switch, arg: string, pass: TCmdlinePass, info: TLineInfo) =
   of "debuginfo": 
     expectNoArg(switch, arg, pass, info)
     incl(gGlobalOptions, optCDebug)
+  of "embedsrc":
+    expectNoArg(switch, arg, pass, info)
+    incl(gGlobalOptions, optEmbedOrigSrc)
   of "compileonly", "c": 
     expectNoArg(switch, arg, pass, info)
     incl(gGlobalOptions, optCompileOnly)
@@ -449,6 +453,9 @@ proc processSwitch(switch, arg: string, pass: TCmdlinePass, info: TLineInfo) =
   of "def":
     expectNoArg(switch, arg, pass, info)
     incl(gGlobalOptions, optDef)
+  of "eval":
+    expectArg(switch, arg, pass, info)
+    gEvalExpr = arg
   of "context":
     expectNoArg(switch, arg, pass, info)
     incl(gGlobalOptions, optContext)
diff --git a/compiler/depends.nim b/compiler/depends.nim
index b9d38236b..1468cbdb9 100755
--- a/compiler/depends.nim
+++ b/compiler/depends.nim
@@ -12,13 +12,11 @@
 import 
   os, options, ast, astalgo, msgs, ropes, idents, passes, importer
 
-proc genDependPass*(): TPass
 proc generateDot*(project: string)
 
 type 
   TGen = object of TPassContext
     module*: PSym
-    filename*: string
   PGen = ref TGen
 
 var gDotGraph: PRope # the generated DOT file; we need a global variable
@@ -48,14 +46,11 @@ proc generateDot(project: string) =
       toRope(changeFileExt(extractFileName(project), "")), gDotGraph]), 
             changeFileExt(project, "dot"))
 
-proc myOpen(module: PSym, filename: string): PPassContext = 
+proc myOpen(module: PSym): PPassContext =
   var g: PGen
   new(g)
   g.module = module
-  g.filename = filename
   result = g
 
-proc gendependPass(): TPass = 
-  initPass(result)
-  result.open = myOpen
-  result.process = addDotDependency
+const gendependPass* = makePass(open = myOpen, process = addDotDependency)
+
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index 4911f3103..2b7c567c6 100755
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -360,7 +360,7 @@ proc writeOutput*(d: PDoc, filename, outExt: string, useWarning = false) =
     writeRope(content, getOutFile(filename, outExt), useWarning)
 
 proc CommandDoc*() =
-  var ast = parseFile(addFileExt(gProjectFull, nimExt))
+  var ast = parseFile(gProjectMainIdx)
   if ast == nil: return 
   var d = newDocumentor(gProjectFull, options.gConfigVars)
   d.hasToc = true
diff --git a/compiler/docgen2.nim b/compiler/docgen2.nim
index 2d175adbf..d48f53d15 100644
--- a/compiler/docgen2.nim
+++ b/compiler/docgen2.nim
@@ -17,14 +17,13 @@ type
   TGen = object of TPassContext
     doc: PDoc
     module: PSym
-    filename: string
   PGen = ref TGen
 
 proc close(p: PPassContext, n: PNode): PNode =
   var g = PGen(p)
   let useWarning = sfMainModule notin g.module.flags
   if gWholeProject or sfMainModule in g.module.flags:
-    writeOutput(g.doc, g.filename, HtmlExt, useWarning)
+    writeOutput(g.doc, g.module.filename, HtmlExt, useWarning)
     try:
       generateIndex(g.doc)
     except EIO:
@@ -35,21 +34,16 @@ proc processNode(c: PPassContext, n: PNode): PNode =
   var g = PGen(c)
   generateDoc(g.doc, n)
 
-proc myOpen(module: PSym, filename: string): PPassContext = 
+proc myOpen(module: PSym): PPassContext = 
   var g: PGen
   new(g)
   g.module = module
-  g.filename = filename
-  var d = newDocumentor(filename, options.gConfigVars)
+  var d = newDocumentor(module.filename, options.gConfigVars)
   d.hasToc = true
   g.doc = d
   result = g
 
-proc docgen2Pass*(): TPass = 
-  initPass(result)
-  result.open = myOpen
-  result.process = processNode
-  result.close = close
+const docgen2Pass* = makePass(open = myOpen, process = processNode, close = close)
 
 proc finishDoc2Pass*(project: string) = 
   nil
diff --git a/compiler/ecmasgen.nim b/compiler/ecmasgen.nim
index 9883acf6d..34b3b4ff5 100755
--- a/compiler/ecmasgen.nim
+++ b/compiler/ecmasgen.nim
@@ -17,12 +17,10 @@ import
   times, ropes, math, passes, ccgutils, wordrecg, renderer, rodread, rodutils,
   intsets, cgmeth
 
-proc ecmasgenPass*(): TPass
 # implementation
 
 type 
   TEcmasGen = object of TPassContext
-    filename: string
     module: PSym
 
   BModule = ref TEcmasGen
@@ -1597,9 +1595,8 @@ proc gen(p: var TProc, n: PNode, r: var TCompRes) =
   
 var globals: PGlobals
 
-proc newModule(module: PSym, filename: string): BModule = 
+proc newModule(module: PSym): BModule = 
   new(result)
-  result.filename = filename
   result.module = module
   if globals == nil: globals = newGlobals()
   
@@ -1657,19 +1654,14 @@ proc myClose(b: PPassContext, n: PNode): PNode =
 
     # write the file:
     var code = con(globals.typeInfo, globals.code)
-    var outfile = changeFileExt(completeCFilePath(m.filename), "js")
+    var outfile = changeFileExt(completeCFilePath(m.module.filename), "js")
     discard writeRopeIfNotEqual(con(genHeader(), code), outfile)
 
-proc myOpenCached(s: PSym, filename: string, rd: PRodReader): PPassContext = 
+proc myOpenCached(s: PSym, rd: PRodReader): PPassContext = 
   InternalError("symbol files are not possible with the Ecmas code generator")
   result = nil
 
-proc myOpen(s: PSym, filename: string): PPassContext = 
-  result = newModule(s, filename)
+proc myOpen(s: PSym): PPassContext = 
+  result = newModule(s)
 
-proc ecmasgenPass(): TPass = 
-  InitPass(result)
-  result.open = myOpen
-  result.close = myClose
-  result.openCached = myOpenCached
-  result.process = myProcess
+const ecmasgenPass* = makePass(myOpen, myOpenCached, myProcess, myClose)
diff --git a/compiler/evals.nim b/compiler/evals.nim
index 4b42f5ada..4b83cb703 100755
--- a/compiler/evals.nim
+++ b/compiler/evals.nim
@@ -72,8 +72,7 @@ proc newStackFrame*(): PStackFrame =
   initIdNodeTable(result.mapping)
   result.params = @[]
 
-proc newEvalContext*(module: PSym, filename: string, 
-                     mode: TEvalMode): PEvalContext = 
+proc newEvalContext*(module: PSym, mode: TEvalMode): PEvalContext =
   new(result)
   result.module = module
   result.mode = mode
@@ -1438,7 +1437,7 @@ proc eval*(c: PEvalContext, n: PNode): PNode =
       stackTrace(c, result, errCannotInterpretNodeX, renderTree(n))
 
 proc evalConstExprAux(module: PSym, e: PNode, mode: TEvalMode): PNode = 
-  var p = newEvalContext(module, "", mode)
+  var p = newEvalContext(module, mode)
   var s = newStackFrame()
   s.call = e
   pushStackFrame(p, s)
@@ -1478,8 +1477,8 @@ proc evalMacroCall(c: PEvalContext, n, nOrig: PNode, sym: PSym): PNode =
   dec(evalTemplateCounter)
   c.callsite = nil
 
-proc myOpen(module: PSym, filename: string): PPassContext = 
-  var c = newEvalContext(module, filename, emRepl)
+proc myOpen(module: PSym): PPassContext =
+  var c = newEvalContext(module, emRepl)
   c.features = {allowCast, allowFFI, allowInfiniteLoops}
   pushStackFrame(c, newStackFrame())
   result = c
@@ -1487,9 +1486,5 @@ proc myOpen(module: PSym, filename: string): PPassContext =
 proc myProcess(c: PPassContext, n: PNode): PNode = 
   result = eval(PEvalContext(c), n)
 
-proc evalPass*(): TPass = 
-  initPass(result)
-  result.open = myOpen
-  result.close = myProcess
-  result.process = myProcess
+const evalPass* = makePass(myOpen, nil, myProcess, myProcess)
 
diff --git a/compiler/extccomp.nim b/compiler/extccomp.nim
index 7b3dc0b3e..11e89fee0 100755
--- a/compiler/extccomp.nim
+++ b/compiler/extccomp.nim
@@ -379,9 +379,18 @@ proc toObjFile*(filenameWithoutExt: string): string =
   # Object file for compilation
   result = changeFileExt(filenameWithoutExt, cc[ccompiler].objExt)
 
-proc addFileToCompile*(filename: string) = 
+proc addFileToCompile*(filename: string) =
   appendStr(toCompile, filename)
 
+proc resetCompilationLists* =
+  initLinkedList(toCompile)
+  ## XXX: we must associate these with their originating module
+  # when the module is loaded/unloaded it adds/removes its items
+  # That's because we still need to CRC check the external files
+  # Maybe we can do that in checkDep on the other hand?
+  initLinkedList(externalToCompile)
+  initLinkedList(toLink)
+
 proc footprint(filename: string): TCrc32 =
   result = crcFromFile(filename) >< 
       platform.OS[targetOS].name ><
@@ -405,11 +414,11 @@ proc externalFileChanged(filename: string): bool =
       f.writeln($currentCrc)
       close(f)
 
-proc addExternalFileToCompile*(filename: string) = 
+proc addExternalFileToCompile*(filename: string) =
   if optForceFullMake in gGlobalOptions or externalFileChanged(filename):
     appendStr(externalToCompile, filename)
 
-proc addFileToLink*(filename: string) = 
+proc addFileToLink*(filename: string) =
   prependStr(toLink, filename) 
   # BUGFIX: was ``appendStr``
 
@@ -540,7 +549,7 @@ proc CompileCFile(list: TLinkedList, script: var PRope, cmds: var TStringSeq,
       app(script, tnl)
     it = PStrEntry(it.next)
 
-proc CallCCompiler*(projectfile: string) = 
+proc CallCCompiler*(projectfile: string) =
   var 
     linkCmd, buildgui, builddll: string
   if gGlobalOptions * {optCompileOnly, optGenScript} == {optCompileOnly}: 
diff --git a/compiler/idents.nim b/compiler/idents.nim
index 838d445d3..a50c5269c 100755
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -25,7 +25,7 @@ type
     next*: PIdent             # for hash-table chaining
     h*: THash                 # hash value of s
   
-var buckets: array[0..4096 * 2 - 1, PIdent]
+var buckets*: array[0..4096 * 2 - 1, PIdent]
 
 proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
   var i = 0
@@ -102,5 +102,5 @@ proc getIdent*(identifier: string, h: THash): PIdent =
 proc IdentEq*(id: PIdent, name: string): bool = 
   result = id.id == getIdent(name).id
 
-let idAnon* = getIdent":anonymous"
+var idAnon* = getIdent":anonymous"
 
diff --git a/compiler/importer.nim b/compiler/importer.nim
index 774bcea6a..d274b4693 100755
--- a/compiler/importer.nim
+++ b/compiler/importer.nim
@@ -34,12 +34,15 @@ proc getModuleName*(n: PNode): string =
     #  "invalide module name: '$1'" % renderTree(n))
     #result = ""
 
-proc checkModuleName*(n: PNode): string =
+proc checkModuleName*(n: PNode): int32 =
   # This returns the full canonical path for a given module import
-  var modulename = n.getModuleName
-  result = findModule(modulename)
-  if result.len == 0:
+  let modulename = n.getModuleName
+  let fullPath = findModule(modulename)
+  if fullPath.len == 0:
     LocalError(n.info, errCannotOpenFile, modulename)
+    result = InvalidFileIDX
+  else:
+    result = fullPath.fileInfoIdx
 
 proc rawImportSymbol(c: PContext, s: PSym) =
   # This does not handle stubs, because otherwise loading on demand would be
@@ -137,8 +140,8 @@ proc evalImport(c: PContext, n: PNode): PNode =
   var emptySet: TIntSet
   for i in countup(0, sonsLen(n) - 1): 
     var f = checkModuleName(n.sons[i])
-    if f.len > 0:
-      var m = gImportModule(f)
+    if f != InvalidFileIDX:
+      var m = gImportModule(c.module, f)
       if sfDeprecated in m.flags: 
         Message(n.sons[i].info, warnDeprecated, m.name.s) 
       # ``addDecl`` needs to be done before ``importAllSymbols``!
@@ -150,8 +153,8 @@ proc evalFrom(c: PContext, n: PNode): PNode =
   result = n
   checkMinSonsLen(n, 2)
   var f = checkModuleName(n.sons[0])
-  if f.len > 0:
-    var m = gImportModule(f)
+  if f != InvalidFileIDX:
+    var m = gImportModule(c.module, f)
     n.sons[0] = newSymNode(m)
     addDecl(c, m)               # add symbol to symbol table of module
     for i in countup(1, sonsLen(n) - 1): importSymbol(c, n.sons[i], m)
@@ -160,8 +163,8 @@ proc evalImportExcept*(c: PContext, n: PNode): PNode =
   result = n
   checkMinSonsLen(n, 2)
   var f = checkModuleName(n.sons[0])
-  if f.len > 0:
-    var m = gImportModule(f)
+  if f != InvalidFileIDX:
+    var m = gImportModule(c.module, f)
     n.sons[0] = newSymNode(m)
     addDecl(c, m)               # add symbol to symbol table of module
     var exceptSet = initIntSet()
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index abb25541b..02f63aee8 100755
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -126,7 +126,7 @@ proc pushInd*(L: var TLexer, indent: int)
 
 proc popInd*(L: var TLexer)
 proc isKeyword*(kind: TTokType): bool
-proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream)
+proc openLexer*(lex: var TLexer, fileidx: int32, inputstream: PLLStream)
 proc rawGetTok*(L: var TLexer, tok: var TToken)
   # reads in the next token into tok and skips it
 proc getColumn*(L: TLexer): int
@@ -135,6 +135,9 @@ proc closeLexer*(lex: var TLexer)
 proc PrintTok*(tok: TToken)
 proc tokToStr*(tok: TToken): string
 
+proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) =
+  OpenLexer(lex, filename.fileInfoIdx, inputStream)
+
 proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "")
 
 proc isKeyword(kind: TTokType): bool = 
@@ -211,10 +214,10 @@ proc fillToken(L: var TToken) =
   L.base = base10
   L.ident = dummyIdent
   
-proc openLexer(lex: var TLexer, filename: string, inputstream: PLLStream) = 
+proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = 
   openBaseLexer(lex, inputstream)
   lex.indentStack = @[0]
-  lex.fileIdx = filename.fileInfoIdx
+  lex.fileIdx = fileIdx
   lex.indentAhead = - 1
   inc(lex.Linenumber, inputstream.lineOffset) 
 
@@ -505,16 +508,31 @@ proc getEscapedChar(L: var TLexer, tok: var TToken) =
     if (xi <= 255): add(tok.literal, Chr(xi))
     else: lexMessage(L, errInvalidCharacterConstant)
   else: lexMessage(L, errInvalidCharacterConstant)
+
+proc newString(s: cstring, len: int): string =
+  ## XXX, how come there is no support for this?
+  result = newString(len)
+  for i in 0 .. <len:
+    result[i] = s[i]
+
+proc HandleCRLF(L: var TLexer, pos: int): int =
+  template registerLine =
+    let col = L.getColNumber(pos)
+    
+    if col > MaxLineLength:
+      lexMessagePos(L, hintLineTooLong, pos)
+
+    if optEmbedOrigSrc in gGlobalOptions:
+      let lineStart = cast[TAddress](L.buf) + L.lineStart
+      let line = newString(cast[cstring](lineStart), col)
+      addSourceLine(L.fileIdx, line)
   
-proc HandleCRLF(L: var TLexer, pos: int): int = 
   case L.buf[pos]
-  of CR: 
-    if getColNumber(L, pos) > MaxLineLength: 
-      lexMessagePos(L, hintLineTooLong, pos)
+  of CR:
+    registerLine()
     result = lexbase.HandleCR(L, pos)
-  of LF: 
-    if getColNumber(L, pos) > MaxLineLength: 
-      lexMessagePos(L, hintLineTooLong, pos)
+  of LF:
+    registerLine()
     result = lexbase.HandleLF(L, pos)
   else: result = pos
   
diff --git a/compiler/magicsys.nim b/compiler/magicsys.nim
index 2681f93d6..6ff714dc7 100755
--- a/compiler/magicsys.nim
+++ b/compiler/magicsys.nim
@@ -77,6 +77,15 @@ proc getSysType(kind: TTypeKind): PType =
 var
   intTypeCache: array[-5..64, PType]
 
+proc resetSysTypes* =
+  systemModule = nil
+  initStrTable(compilerprocs)
+  for i in low(gSysTypes)..high(gSysTypes):
+    gSysTypes[i] = nil
+
+  for i in low(intTypeCache)..high(intTypeCache):
+    intTypeCache[i] = nil
+
 proc getIntLitType*(literal: PNode): PType =
   # we cache some common integer literal types for performance:
   let value = literal.intVal
diff --git a/compiler/main.nim b/compiler/main.nim
index 0a76b967a..cba96a104 100755
--- a/compiler/main.nim
+++ b/compiler/main.nim
@@ -12,11 +12,11 @@
 
 import 
   llstream, strutils, ast, astalgo, lexer, syntaxes, renderer, options, msgs, 
-  os, lists, condsyms, rodread, rodwrite, ropes, trees, 
+  os, lists, condsyms, rodread, rodwrite, ropes, trees, times,
   wordrecg, sem, semdata, idents, passes, docgen, extccomp,
-  cgen, ecmasgen,
+  cgen, ecmasgen, cgendata,
   platform, nimconf, importer, passaux, depends, evals, types, idgen,
-  tables, docgen2, service
+  tables, docgen2, service, magicsys, parser, crc, ccgutils
 
 const
   has_LLVM_Backend = false
@@ -28,60 +28,166 @@ proc MainCommand*()
 
 # ------------------ module handling -----------------------------------------
 
+type
+  TNeedRecompile = enum Maybe, No, Yes, Probing, Recompiled
+  TCrcStatus = enum crcNotTaken, crcCached, crcHasChanged, crcNotChanged
+
+  TModuleInMemory = object
+    compiledAt: float
+    crc: TCrc32
+    deps: seq[int32] ## XXX: slurped files are not currently tracked
+    needsRecompile: TNeedRecompile
+    crcStatus: TCrcStatus
+
 var
-  compMods = initTable[string, PSym]() # all compiled modules
+  gCompiledModules: seq[PSym] = @[]
+  gMemCacheData: seq[TModuleInMemory] = @[]
+    ## XXX: we should implement recycling of file IDs
+    ## if the user keeps renaming modules, the file IDs will keep growing
+
+proc getModule(fileIdx: int32): PSym =
+  if fileIdx >= 0 and fileIdx < gCompiledModules.len:
+    result = gCompiledModules[fileIdx]
+  else:
+    result = nil
+
+template compiledAt(x: PSym): expr =
+  gMemCacheData[x.position].compiledAt
+
+template crc(x: PSym): expr =
+  gMemCacheData[x.position].crc
+
+proc crcChanged(fileIdx: int32): bool =
+  InternalAssert fileIdx >= 0 and fileIdx < gMemCacheData.len
+  
+  template updateStatus =
+    gMemCacheData[fileIdx].crcStatus = if result: crcHasChanged
+                                       else: crcNotChanged
+    # echo "TESTING CRC: ", fileIdx.toFilename, " ", result
+    
+  case gMemCacheData[fileIdx].crcStatus:
+  of crcHasChanged:
+    result = true
+  of crcNotChanged:
+    result = false
+  of crcCached:
+    let newCrc = crcFromFile(fileIdx.toFilename)
+    result = newCrc != gMemCacheData[fileIdx].crc
+    gMemCacheData[fileIdx].crc = newCrc
+    updateStatus()
+  of crcNotTaken:
+    gMemCacheData[fileIdx].crc = crcFromFile(fileIdx.toFilename)
+    result = true
+    updateStatus()
+
+proc doCRC(fileIdx: int32) =
+  if gMemCacheData[fileIdx].crcStatus == crcNotTaken:
+    # echo "FIRST CRC: ", fileIdx.ToFilename
+    gMemCacheData[fileIdx].crc = crcFromFile(fileIdx.toFilename)
+
+proc addDep(x: Psym, dep: int32) =
+  growCache gMemCacheData, dep
+  gMemCacheData[x.position].deps.safeAdd(dep)
+
+proc ResetModule(fileIdx: int32) =
+  echo "HARD RESETTING ", fileIdx.toFilename
+  gMemCacheData[fileIdx].needsRecompile = Yes
+  gCompiledModules[fileIdx] = nil
+  cgendata.gModules[fileIdx] = nil
+
+proc ResetAllModules =
+  for i in 0..gCompiledModules.high:
+    if gCompiledModules[i] != nil:
+      ResetModule(i.int32)
+
+  for m in cgenModules():
+    echo "CGEN MODULE FOUND"
+
+proc checkDepMem(fileIdx: int32): TNeedRecompile  =
+  template markDirty =
+    ResetModule(fileIdx)
+    return Yes
 
-# This expects a normalized module path
-proc registerModule(filename: string, module: PSym) =
-  compMods[filename] = module
+  if gMemCacheData[fileIdx].needsRecompile != Maybe:
+    return gMemCacheData[fileIdx].needsRecompile
 
-# This expects a normalized module path
-proc getModule(filename: string): PSym =
-  result = compMods[filename]
+  if optForceFullMake in gGlobalOptions or
+     curCaasCmd != lastCaasCmd or
+     crcChanged(fileIdx): markDirty
+  
+  if gMemCacheData[fileIdx].deps != nil:
+    gMemCacheData[fileIdx].needsRecompile = Probing
+    for dep in gMemCacheData[fileIdx].deps:
+      let d = checkDepMem(dep)
+      if d in { Yes, Recompiled }:
+        echo fileIdx.toFilename, " depends on ", dep.toFilename, " ", d
+        markDirty
+  
+  gMemCacheData[fileIdx].needsRecompile = No
+  return No
 
-var gModulesCount = 0
-proc newModule(filename: string): PSym = 
+proc newModule(fileIdx: int32): PSym =
   # We cannot call ``newSym`` here, because we have to circumvent the ID
   # mechanism, which we do in order to assign each module a persistent ID. 
   new(result)
   result.id = - 1             # for better error checking
   result.kind = skModule
+  let filename = fileIdx.toFilename
   result.name = getIdent(splitFile(filename).name)
   if not isNimrodIdentifier(result.name.s):
     rawMessage(errInvalidModuleName, result.name.s)
   
   result.owner = result       # a module belongs to itself
-  result.info = newLineInfo(filename, 1, 1)
-  result.position = gModulesCount
-  inc gModulesCount
+  result.info = newLineInfo(fileIdx, 1, 1)
+  result.position = fileIdx
+  
+  growCache gMemCacheData, fileIdx
+  growCache gCompiledModules, fileIdx
+  gCompiledModules[result.position] = result
+  
   incl(result.flags, sfUsed)
   initStrTable(result.tab)
-  RegisterModule(filename, result)
   StrTableAdd(result.tab, result) # a module knows itself
-  
-proc CompileModule(filename: string, flags: TSymFlags): PSym
-proc importModule(filename: string): PSym = 
+
+proc compileModule(fileIdx: int32, flags: TSymFlags): PSym =
+  result = getModule(fileIdx)
+  if result == nil:
+    growCache gMemCacheData, fileIdx
+    gMemCacheData[fileIdx].needsRecompile = Probing
+    result = newModule(fileIdx)
+    var rd = handleSymbolFile(result)
+    result.flags = result.flags + flags
+    if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCheck, cmdIdeTools}:
+      rd = handleSymbolFile(result)
+      if result.id < 0: 
+        InternalError("handleSymbolFile should have set the module\'s ID")
+        return
+    else:
+      result.id = getID()
+    processModule(result, nil, rd)
+    if optCaasEnabled in gGlobalOptions:
+      gMemCacheData[fileIdx].compiledAt = gLastCmdTime
+      gMemCacheData[fileIdx].needsRecompile = Recompiled
+      doCRC fileIdx
+  else:
+    if checkDepMem(fileIdx) == Yes:
+      result = CompileModule(fileIdx, flags)
+    else:
+      result = gCompiledModules[fileIdx]
+
+proc importModule(s: PSym, fileIdx: int32): PSym =
   # this is called by the semantic checking phase
-  result = getModule(filename)
-  if result == nil: 
-    # compile the module
-    result = compileModule(filename, {})
-  elif sfSystemModule in result.flags: 
+  result = compileModule(fileIdx, {})
+  if optCaasEnabled in gGlobalOptions: addDep(s, fileIdx)
+  if sfSystemModule in result.flags:
     LocalError(result.info, errAttemptToRedefine, result.Name.s)
-  
-proc CompileModule(filename: string, flags: TSymFlags): PSym =
-  var rd: PRodReader = nil
-  var f = addFileExt(filename, nimExt)
-  result = newModule(f)
-  result.flags = result.flags + flags
-  if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCheck, cmdIdeTools}: 
-    rd = handleSymbolFile(result, f)
-    if result.id < 0: 
-      InternalError("handleSymbolFile should have set the module\'s ID")
-      return
-  else:
-    result.id = getID()
-  processModule(result, f, nil, rd)
+
+proc includeModule(s: PSym, fileIdx: int32): PNode =
+  result = syntaxes.parseFile(fileIdx)
+  if optCaasEnabled in gGlobalOptions:
+    growCache gMemCacheData, fileIdx
+    addDep(s, fileIdx)
+    doCrc(fileIdx)
 
 proc `==^`(a, b: string): bool =
   try:
@@ -89,22 +195,31 @@ proc `==^`(a, b: string): bool =
   except EOS:
     result = false
 
-proc CompileProject(projectFile = gProjectFull) =
-  let systemFile = options.libpath / "system"
-  if projectFile.addFileExt(nimExt) ==^ systemFile.addFileExt(nimExt):
+proc compileSystemModule =
+  if magicsys.SystemModule == nil:
+    SystemFileIdx = fileInfoIdx(options.libpath/"system.nim")
+    discard CompileModule(SystemFileIdx, {sfSystemModule})
+
+proc CompileProject(projectFile = gProjectMainIdx) =
+  let systemFileIdx = fileInfoIdx(options.libpath / "system.nim")
+  if projectFile == SystemFileIdx:
     discard CompileModule(projectFile, {sfMainModule, sfSystemModule})
   else:
-    discard CompileModule(systemFile, {sfSystemModule})
+    compileSystemModule()
     discard CompileModule(projectFile, {sfMainModule})
 
+proc rodPass =
+  if optSymbolFiles in gGlobalOptions:
+    registerPass(rodwritePass)
+
 proc semanticPasses =
-  registerPass(verbosePass())
-  registerPass(sem.semPass())
+  registerPass verbosePass
+  registerPass semPass
 
 proc CommandGenDepend =
   semanticPasses()
-  registerPass(genDependPass())
-  registerPass(cleanupPass())
+  registerPass(genDependPass)
+  registerPass(cleanupPass)
   compileProject()
   generateDot(gProjectFull)
   execExternalProgram("dot -Tpng -o" & changeFileExt(gProjectFull, "png") &
@@ -113,31 +228,76 @@ proc CommandGenDepend =
 proc CommandCheck =
   msgs.gErrorMax = high(int)  # do not stop after first error
   semanticPasses()            # use an empty backend for semantic checking only
-  registerPass(rodwrite.rodwritePass())
-  compileProject(mainCommandArg())
+  rodPass()
+  compileProject()
 
 proc CommandDoc2 =
   msgs.gErrorMax = high(int)  # do not stop after first error
   semanticPasses()
-  registerPass(docgen2Pass())
+  registerPass(docgen2Pass)
   #registerPass(cleanupPass())
-  compileProject(mainCommandArg())
-  finishDoc2Pass(gProjectFull)
+  compileProject()
+  finishDoc2Pass(gProjectName)
 
 proc CommandCompileToC =
   semanticPasses()
-  registerPass(cgen.cgenPass())
-  registerPass(rodwrite.rodwritePass())
+  registerPass(cgenPass)
+  rodPass()
   #registerPass(cleanupPass())
+  if optCaasEnabled in gGlobalOptions:
+    # echo "BEFORE CHECK DEP"
+    # discard checkDepMem(gProjectMainIdx)
+    # echo "CHECK DEP COMPLETE"
+
   compileProject()
+
+  if optCaasEnabled in gGlobalOptions:
+    cgenCaasUpdate()
+
   if gCmd != cmdRun:
     extccomp.CallCCompiler(changeFileExt(gProjectFull, ""))
 
+  if optCaasEnabled in gGlobalOptions:
+    # caas will keep track only of the compilation commands
+    lastCaasCmd = curCaasCmd
+    resetCgenModules()
+    for i in 0 .. <gMemCacheData.len:
+      gMemCacheData[i].crcStatus = crcCached
+      gMemCacheData[i].needsRecompile = Maybe
+
+      # XXX: clean these global vars
+      # ccgstmts.gBreakpoints
+      # ccgthreadvars.nimtv
+      # ccgthreadvars.nimtVDeps
+      # ccgthreadvars.nimtvDeclared
+      # cgendata
+      # cgmeth?
+      # condsyms?
+      # depends?
+      # lexer.gLinesCompiled
+      # msgs - error counts
+      # magicsys, when system.nim changes
+      # rodread.rodcompilerProcs
+      # rodread.gTypeTable
+      # rodread.gMods
+      
+      # !! ropes.cache
+      # semthreads.computed?
+      #
+      # suggest.usageSym
+      #
+      # XXX: can we run out of IDs?
+      # XXX: detect config reloading (implement as error/require restart)
+      # XXX: options are appended (they will accumulate over time)
+    resetCompilationLists()
+    ccgutils.resetCaches()
+    GC_fullCollect()
+
 when has_LLVM_Backend:
   proc CommandCompileToLLVM =
     semanticPasses()
     registerPass(llvmgen.llvmgenPass())
-    registerPass(rodwrite.rodwritePass())
+    rodPass()
     #registerPass(cleanupPass())
     compileProject()
 
@@ -148,31 +308,52 @@ proc CommandCompileToEcmaScript =
   DefineSymbol("nimrod") # 'nimrod' is always defined
   DefineSymbol("ecmascript")
   semanticPasses()
-  registerPass(ecmasgenPass())
+  registerPass(ecmasgenPass)
   compileProject()
 
-proc CommandInteractive =
-  msgs.gErrorMax = high(int)  # do not stop after first error
+proc InteractivePasses =
+  incl(gGlobalOptions, optSafeCode)
   #setTarget(osNimrodVM, cpuNimrodVM)
   initDefines()
   DefineSymbol("nimrodvm")
-  when hasFFI:
-    DefineSymbol("nimffi")
+  when hasFFI: DefineSymbol("nimffi")
+  registerPass(verbosePass)
+  registerPass(semPass)
+  registerPass(evalPass)
+
+var stdinModule: PSym
+proc makeStdinModule: PSym =
+  if stdinModule == nil:
+    stdinModule = newModule(fileInfoIdx"stdin")
+    stdinModule.id = getID()
+  result = stdinModule
 
-  registerPass(verbosePass())
-  registerPass(sem.semPass())
-  registerPass(evals.evalPass()) # load system module:
-  discard CompileModule(options.libpath /"system", {sfSystemModule})
+proc CommandInteractive =
+  msgs.gErrorMax = high(int)  # do not stop after first error
+  InteractivePasses()
+  compileSystemModule()
   if commandArgs.len > 0:
-    discard CompileModule(mainCommandArg(), {})
+    discard CompileModule(fileInfoIdx(gProjectFull), {})
   else:
-    var m = newModule("stdin")
-    m.id = getID()
+    var m = makeStdinModule()
     incl(m.flags, sfMainModule)
-    processModule(m, "stdin", LLStreamOpenStdIn(), nil)
+    processModule(m, LLStreamOpenStdIn(), nil)
+
+const evalPasses = [verbosePass, semPass, evalPass]
+
+proc evalNim(nodes: PNode, module: PSym) =
+  carryPasses(nodes, module, evalPasses)
+
+proc commandEval(exp: string) =
+  if SystemModule == nil:
+    InteractivePasses()
+    compileSystemModule()
+  var echoExp = "echo \"eval\\t\", " & "repr(" & exp & ")"
+  evalNim(echoExp.parseString, makeStdinModule())
 
 proc CommandPretty =
-  var module = parseFile(addFileExt(mainCommandArg(), NimExt))
+  var projectFile = addFileExt(mainCommandArg(), NimExt)
+  var module = parseFile(projectFile.fileInfoIdx)
   if module != nil: 
     renderModule(module, getOutFile(mainCommandArg(), "pretty." & NimExt))
   
@@ -196,20 +377,83 @@ proc CommandScan =
 proc CommandSuggest =
   msgs.gErrorMax = high(int)  # do not stop after first error
   semanticPasses()
-  registerPass(rodwrite.rodwritePass())
+  rodPass()
   compileProject()
 
 proc wantMainModule =
   if gProjectFull.len == 0:
     Fatal(gCmdLineInfo, errCommandExpectsFilename)
+  gProjectMainIdx = addFileExt(gProjectFull, nimExt).fileInfoIdx
+
+var oss: PGenericSeq
+proc dbgseqimp(x: PGenericSeq) {.cdecl.} =
+  oss = x
+
+seqdbg = dbgseqimp
+
+proc resetMemory =
+  resetCompilationLists()
+  ccgutils.resetCaches()
+  ResetAllModules()
+  resetRopeCache()
+  resetSysTypes()
+  gOwners = @[]
+  rangeDestructorProc = nil
+  for i in low(buckets)..high(buckets):
+    buckets[i] = nil
+  idAnon = nil
+  
+  # XXX: clean these global vars
+  # ccgstmts.gBreakpoints
+  # ccgthreadvars.nimtv
+  # ccgthreadvars.nimtVDeps
+  # ccgthreadvars.nimtvDeclared
+  # cgendata
+  # cgmeth?
+  # condsyms?
+  # depends?
+  # lexer.gLinesCompiled
+  # msgs - error counts
+  # magicsys, when system.nim changes
+  # rodread.rodcompilerProcs
+  # rodread.gTypeTable
+  # rodread.gMods
   
+  # !! ropes.cache
+  # semthreads.computed?
+  #
+  # suggest.usageSym
+  #
+  # XXX: can we run out of IDs?
+  # XXX: detect config reloading (implement as error/require restart)
+  # XXX: options are appended (they will accumulate over time)
+  # vis = visimpl
+  gcDebugging = true
+  echo "COLLECT 1"
+  GC_fullCollect()
+  echo "COLLECT 2"
+  GC_fullCollect()
+  echo "COLLECT 3"
+  GC_fullCollect()
+  echo GC_getStatistics()
+
+const
+  SimiluateCaasMemReset = false
+  PrintRopeCacheStats = false
+
 proc MainCommand =
+  when SimiluateCaasMemReset:
+    gGlobalOptions.incl(optCaasEnabled)
+      
+  # In "nimrod serve" scenario, each command must reset the registered passes
+  clearPasses()
+  gLastCmdTime = epochTime()
   appendStr(searchPaths, options.libpath)
   if gProjectFull.len != 0:
     # current path is always looked first for modules
     prependStr(searchPaths, gProjectPath)
   setID(100)
-  passes.gIncludeFile = syntaxes.parseFile
+  passes.gIncludeFile = includeModule
   passes.gImportModule = importModule
   case command.normalize
   of "c", "cc", "compile", "compiletoc": 
@@ -292,7 +536,7 @@ proc MainCommand =
   of "parse": 
     gCmd = cmdParse
     wantMainModule()
-    discard parseFile(addFileExt(gProjectFull, nimExt))
+    discard parseFile(gProjectMainIdx)
   of "scan": 
     gCmd = cmdScan
     wantMainModule()
@@ -301,20 +545,37 @@ proc MainCommand =
   of "i": 
     gCmd = cmdInteractive
     CommandInteractive()
+  of "e":
+    # XXX: temporary command for easier testing
+    commandEval(mainCommandArg())
+  of "reset":
+    resetMemory()
   of "idetools":
     gCmd = cmdIdeTools
-    wantMainModule()
-    CommandSuggest()
+    if gEvalExpr != "":
+      commandEval(gEvalExpr)
+    else:
+      wantMainModule()
+      CommandSuggest()
   of "serve":
-    gCmd = cmdIdeTools
-    msgs.gErrorMax = high(int)  # do not stop after first error
-    semanticPasses()
-    # no need to write rod files and would slow down things:
-    #registerPass(rodwrite.rodwritePass())
-    discard CompileModule(options.libpath / "system", {sfSystemModule})
-    service.serve(proc () =
-      let projectFile = mainCommandArg()
-      discard CompileModule(projectFile, {sfMainModule})
-    )
-  else: rawMessage(errInvalidCommandX, command)
+    gGlobalOptions.incl(optCaasEnabled)
+    msgs.gErrorMax = high(int)  # do not stop after first error     
+    serve(MainCommand)
+  else:
+    rawMessage(errInvalidCommandX, command)
+  
+  if msgs.gErrorCounter == 0 and gCmd notin {cmdInterpret, cmdRun}:
+    rawMessage(hintSuccessX, [$gLinesCompiled,
+               formatFloat(epochTime() - gLastCmdTime, ffDecimal, 3),
+               formatSize(getTotalMem())])
+
+  when PrintRopeCacheStats:
+    echo "rope cache stats: "
+    echo "  tries : ", gCacheTries
+    echo "  misses: ", gCacheMisses
+    echo "  int tries: ", gCacheIntTries
+    echo "  efficiency: ", formatFloat(1-(gCacheMisses.float/gCacheTries.float), ffDecimal, 3)
+
+  when SimiluateCaasMemReset:
+    resetMemory()
 
diff --git a/compiler/msgs.nim b/compiler/msgs.nim
index 9858b1b9e..0f2affc36 100755
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -8,7 +8,7 @@
 #
 
 import
-  options, strutils, os, tables, sockets
+  options, strutils, os, tables, sockets, ropes, platform
 
 type 
   TMsgKind* = enum 
@@ -403,6 +403,14 @@ type
   TFileInfo*{.final.} = object 
     fullPath*: string          # This is a canonical full filesystem path
     projPath*: string          # This is relative to the project's root
+    
+    quotedName*: PRope         # cached quoted short name for codegen
+                               # purpoes
+    
+    lines*: seq[PRope]         # the source code of the module
+                               #   used for better error messages and
+                               #   embedding the original source in the
+                               #   generated code
 
   TLineInfo*{.final.} = object # This is designed to be as small as possible,
                                # because it is used
@@ -416,15 +424,48 @@ type
   ERecoverableError* = object of EInvalidValue
   ESuggestDone* = object of EBase
 
+const
+  InvalidFileIDX* = int32(-1)
+
 var
   filenameToIndexTbl = initTable[string, int32]()
-  fileInfos: seq[TFileInfo] = @[]
+  fileInfos*: seq[TFileInfo] = @[]
+  SystemFileIdx*: int32
+
+proc toCChar*(c: Char): string = 
+  case c
+  of '\0'..'\x1F', '\x80'..'\xFF': result = '\\' & toOctal(c)
+  of '\'', '\"', '\\': result = '\\' & c
+  else: result = $(c)
+
+proc makeCString*(s: string): PRope =
+  # BUGFIX: We have to split long strings into many ropes. Otherwise
+  # this could trigger an InternalError(). See the ropes module for
+  # further information.
+  const 
+    MaxLineLength = 64
+  result = nil
+  var res = "\""
+  for i in countup(0, len(s) - 1):
+    if (i + 1) mod MaxLineLength == 0:
+      add(res, '\"')
+      add(res, tnl)
+      app(result, toRope(res)) # reset:
+      setlen(res, 1)
+      res[0] = '\"'
+    add(res, toCChar(s[i]))
+  add(res, '\"')
+  app(result, toRope(res))
+
 
 proc newFileInfo(fullPath, projPath: string): TFileInfo =
   result.fullPath = fullPath
   #shallow(result.fullPath)
   result.projPath = projPath
   #shallow(result.projPath)
+  result.quotedName = projPath.extractFilename.makeCString
+  if optEmbedOrigSrc in gGlobalOptions or true:
+    result.lines = @[]
 
 proc fileInfoIdx*(filename: string): int32 =
   var
@@ -444,7 +485,8 @@ proc fileInfoIdx*(filename: string): int32 =
     result = filenameToIndexTbl[canon]
   else:
     result = fileInfos.len.int32
-    fileInfos.add(newFileInfo(canon, if pseudoPath: "" else: canon.shortenDir))
+    fileInfos.add(newFileInfo(canon, if pseudoPath: filename
+                                     else: canon.shortenDir))
     filenameToIndexTbl[canon] = result
 
 proc newLineInfo*(fileInfoIdx: int32, line, col: int): TLineInfo =
@@ -520,19 +562,21 @@ proc getInfoContext*(index: int): TLineInfo =
   if i >=% L: result = UnknownLineInfo()
   else: result = msgContext[i]
 
-proc ToFilename*(info: TLineInfo): string =
-  if info.fileIndex < 0: result = "???"
-  else: result = fileInfos[info.fileIndex].projPath
-
-proc ToFilename*(fileIdx: int32): string =
+proc toFilename*(fileIdx: int32): string =
   if fileIdx < 0: result = "???"
   else: result = fileInfos[fileIdx].projPath
 
-proc toFullPath*(info: TLineInfo): string =
-  if info.fileIndex < 0: result = "???"
-  else: result = fileInfos[info.fileIndex].fullPath
+proc toFullPath*(fileIdx: int32): string =
+  if fileIdx < 0: result = "???"
+  else: result = fileInfos[fileIdx].fullPath
+
+template toFilename*(info: TLineInfo): string =
+  info.fileIndex.toFilename
 
-proc ToMsgFilename*(info: TLineInfo): string =
+template toFullPath*(info: TLineInfo): string =
+  info.fileIndex.toFullPath
+
+proc toMsgFilename*(info: TLineInfo): string =
   if info.fileIndex < 0: result = "???"
   else:
     if gListFullPaths:
@@ -540,7 +584,7 @@ proc ToMsgFilename*(info: TLineInfo): string =
     else:
       result = fileInfos[info.fileIndex].projPath
 
-proc ToLinenumber*(info: TLineInfo): int {.inline.} = 
+proc toLinenumber*(info: TLineInfo): int {.inline.} = 
   result = info.line
 
 proc toColumn*(info: TLineInfo): int {.inline.} = 
@@ -721,3 +765,28 @@ template AssertNotNil*(e: expr): expr =
 
 template InternalAssert*(e: bool): stmt =
   if not e: InternalError($InstantiationInfo())
+
+proc addSourceLine*(fileIdx: int32, line: string) =
+  fileInfos[fileIdx].lines.add line.toRope
+
+proc sourceLine*(i: TLineInfo): PRope =
+  if i.fileIndex < 0: return nil
+  InternalAssert i.fileIndex < fileInfos.len and
+                 i.line <= fileInfos[i.fileIndex].lines.len
+
+  result = fileInfos[i.fileIndex].lines[i.line-1]
+
+proc quotedFilename*(i: TLineInfo): PRope =
+  InternalAssert i.fileIndex >= 0
+  result = fileInfos[i.fileIndex].quotedName
+
+ropes.ErrorHandler = proc (err: TRopesError, msg: string, useWarning: bool) =
+  case err
+  of rInvalidFormatStr:
+    internalError("ropes: invalid format string: " & msg)
+  of rTokenTooLong:
+    internalError("ropes: token too long: " & msg)
+  of rCannotOpenFile:
+    rawMessage(if useWarning: warnCannotOpenFile else: errCannotOpenFile,
+               msg)
+ 
diff --git a/compiler/nimconf.nim b/compiler/nimconf.nim
index f4f6d0a42..0f0b76827 100755
--- a/compiler/nimconf.nim
+++ b/compiler/nimconf.nim
@@ -235,15 +235,16 @@ proc LoadConfigs*(cfg: string) =
   if optSkipParentConfigFiles notin gGlobalOptions:
     for dir in parentDirs(pd, fromRoot=true, inclusive=false):
       readConfigFile(dir / cfg)
-    
-  if optSkipProjConfigFile notin gGlobalOptions and gProjectName.len != 0:
+  
+  if optSkipProjConfigFile notin gGlobalOptions:
     readConfigFile(pd / cfg)
     
-    var conffile = changeFileExt(gProjectFull, "cfg")
-    if conffile != pd / cfg and existsFile(conffile):
-      readConfigFile(conffile)
-      rawMessage(warnConfigDeprecated, conffile)
-    
-    # new project wide config file:
-    readConfigFile(changeFileExt(gProjectFull, "nimrod.cfg"))
+    if gProjectName.len != 0:
+      var conffile = changeFileExt(gProjectFull, "cfg")
+      if conffile != pd / cfg and existsFile(conffile):
+        readConfigFile(conffile)
+        rawMessage(warnConfigDeprecated, conffile)
+      
+      # new project wide config file:
+      readConfigFile(changeFileExt(gProjectFull, "nimrod.cfg"))
  
diff --git a/compiler/nimrod.cfg b/compiler/nimrod.cfg
index 42a5ed5f5..aa49729b9 100755
--- a/compiler/nimrod.cfg
+++ b/compiler/nimrod.cfg
@@ -1,9 +1,12 @@
 # Special configuration file for the Nimrod project
 
---hint[XDeclaredButNotUsed]=off
-path="llvm"
-path="$projectPath/.."
+mainModule:"nimrod.nim"
 
-path="$nimrod/packages/docutils"
+hint[XDeclaredButNotUsed]:off
+path:"llvm"
+path:"$projectPath/.."
+
+path:"$nimrod/packages/docutils"
+
+define:booting
 
---define:booting
diff --git a/compiler/nimrod.nim b/compiler/nimrod.nim
index 24b8518d7..6c999128c 100755
--- a/compiler/nimrod.nim
+++ b/compiler/nimrod.nim
@@ -14,7 +14,7 @@ when defined(gcc) and defined(windows):
     {.link: "icons/nimrod_icon.o".}
 
 import 
-  times, commands, lexer, condsyms, options, msgs, nversion, nimconf, ropes, 
+  commands, lexer, condsyms, options, msgs, nversion, nimconf, ropes, 
   extccomp, strutils, os, platform, main, parseopt, service
 
 when hasTinyCBackend:
@@ -32,7 +32,6 @@ proc prependCurDir(f: string): string =
     result = f
 
 proc HandleCmdLine() =
-  var start = epochTime()
   if paramCount() == 0:
     writeCommandLineUsage()
   else:
@@ -60,10 +59,6 @@ proc HandleCmdLine() =
       when hasTinyCBackend:
         if gCmd == cmdRun:
           tccgen.run()
-      if gCmd notin {cmdInterpret, cmdRun}:
-        rawMessage(hintSuccessX, [$gLinesCompiled,
-                   formatFloat(epochTime() - start, ffDecimal, 3),
-                   formatSize(getTotalMem())])
       if optRun in gGlobalOptions:
         if gCmd == cmdCompileToEcmaScript:
           var ex = quoteIfContainsWhite(
@@ -78,6 +73,7 @@ proc HandleCmdLine() =
 
 when defined(GC_setMaxPause):
   GC_setMaxPause 2_000
+GC_disableMarkAndSweep()
 condsyms.InitDefines()
 HandleCmdLine()
 quit(options.gExitcode)
diff --git a/compiler/options.nim b/compiler/options.nim
index 0d783cee2..7350f81b7 100755
--- a/compiler/options.nim
+++ b/compiler/options.nim
@@ -31,6 +31,7 @@ type                          # please make sure we have under 32 options
     optImplicitStatic,        # optimization: implicit at compile time
                               # evaluation
     optPatterns               # en/disable pattern matching
+
   TOptions* = set[TOption]
   TGlobalOption* = enum       # **keep binary compatible**
     gloptNone, optForceFullMake, optBoehmGC, optRefcGC, optDeadCodeElim, 
@@ -44,6 +45,7 @@ type                          # please make sure we have under 32 options
     optGenMapping,            # generate a mapping file
     optRun,                   # run the compiled project
     optSymbolFiles,           # use symbol files for speeding up compilation
+    optCaasEnabled            # compiler-as-a-service is running
     optSkipConfigFile,        # skip the general config file
     optSkipProjConfigFile,    # skip the project's config file
     optSkipUserConfigFile,    # skip the users's config file
@@ -59,8 +61,9 @@ type                          # please make sure we have under 32 options
     optTaintMode,             # taint mode turned on
     optTlsEmulation,          # thread var emulation turned on
     optGenIndex               # generate index file for documentation;
+    optEmbedOrigSrc           # embed the original source in the generated code
                               # also: generate header file
-
+   
   TGlobalOptions* = set[TGlobalOption]
   TCommands* = enum           # Nimrod's commands
                               # **keep binary compatible**
@@ -95,8 +98,9 @@ var
   gCmd*: TCommands = cmdNone  # the command
   gVerbosity*: int            # how verbose the compiler is
   gNumberOfProcessors*: int   # number of processors
-
   gWholeProject*: bool # for 'doc2': output any dependency
+  gEvalExpr*: string          # expression for idetools --eval
+  gLastCmdTime*: float        # when caas is enabled, we measure each command
   gListFullPaths*: bool
   
 proc importantComments*(): bool {.inline.} = gCmd in {cmdDoc, cmdIdeTools}
@@ -119,6 +123,7 @@ var
   gProjectName* = "" # holds a name like 'nimrod'
   gProjectPath* = "" # holds a path like /home/alice/projects/nimrod/compiler/
   gProjectFull* = "" # projectPath/projectName
+  gProjectMainIdx*: int32 # the canonical path id of the main module
   nimcacheDir* = ""
   command* = "" # the main command (e.g. cc, check, scan, etc)
   commandArgs*: seq[string] = @[] # any arguments after the main command
@@ -260,7 +265,8 @@ proc binaryStrSearch*(x: openarray[string], y: string): int =
   result = - 1
 
 # Can we keep this? I'm using it all the time
-template nimdbg*: expr = c.filename.endsWith"hallo.nim"
-template cnimdbg*: expr = p.module.filename.endsWith"hallo.nim"
-template enimdbg*: expr = c.module.name.s == "hallo"
-template pnimdbg*: expr = p.lex.fileIdx.ToFilename.endsWith"hallo.nim"
+template nimdbg*: expr = c.module.fileIdx == gProjectMainIdx
+template cnimdbg*: expr = p.module.module.fileIdx == gProjectMainIdx
+template pnimdbg*: expr = p.lex.fileIdx == gProjectMainIdx
+template lnimdbg*: expr = L.fileIdx == gProjectMainIdx
+
diff --git a/compiler/parser.nim b/compiler/parser.nim
index bfc6b3b65..a2c7f71d2 100755
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -64,11 +64,14 @@ proc parseCase(p: var TParser): PNode
 proc getTok(p: var TParser) = 
   rawGetTok(p.lex, p.tok)
 
-proc OpenParser(p: var TParser, filename: string, inputStream: PLLStream) = 
+proc OpenParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) =
   initToken(p.tok)
-  OpenLexer(p.lex, filename, inputstream)
+  OpenLexer(p.lex, fileIdx, inputstream)
   getTok(p)                   # read the first token
-  
+
+proc OpenParser*(p: var TParser, filename: string, inputStream: PLLStream) =
+  openParser(p, filename.fileInfoIdx, inputStream)
+
 proc CloseParser(p: var TParser) = 
   CloseLexer(p.lex)
 
diff --git a/compiler/passaux.nim b/compiler/passaux.nim
index 1ee6023c8..4a85c994c 100755
--- a/compiler/passaux.nim
+++ b/compiler/passaux.nim
@@ -12,7 +12,7 @@
 import 
   strutils, ast, astalgo, passes, msgs, options, idgen
 
-proc verboseOpen(s: PSym, filename: string): PPassContext = 
+proc verboseOpen(s: PSym): PPassContext =
   #MessageOut('compiling ' + s.name.s);
   result = nil                # we don't need a context
   if gVerbosity > 0: rawMessage(hintProcessing, s.name.s)
@@ -26,13 +26,10 @@ proc verboseProcess(context: PPassContext, n: PNode): PNode =
     incl(msgs.gNotes, hintProcessing)
     Message(n.info, hintProcessing, $idgen.gBackendId)
   
-proc verbosePass*(): TPass = 
-  initPass(result)
-  result.open = verboseOpen
-  result.process = verboseProcess
+const verbosePass* = makePass(open = verboseOpen, process = verboseProcess)
 
 proc cleanUp(c: PPassContext, n: PNode): PNode = 
-  result = n                  
+  result = n
   # we cannot clean up if dead code elimination is activated
   if optDeadCodeElim in gGlobalOptions or n == nil: return 
   case n.kind
@@ -46,7 +43,5 @@ proc cleanUp(c: PPassContext, n: PNode): PNode =
   else: 
     nil
 
-proc cleanupPass*(): TPass = 
-  initPass(result)
-  result.process = cleanUp
-  result.close = cleanUp
+const cleanupPass* = makePass(process = cleanUp, close = cleanUp)
+
diff --git a/compiler/passes.nim b/compiler/passes.nim
index 9b4a1b365..8d228fe9a 100755
--- a/compiler/passes.nim
+++ b/compiler/passes.nim
@@ -18,32 +18,44 @@ import
 type  
   TPassContext* = object of TObject # the pass's context
     fromCache*: bool  # true if created by "openCached"
-    
+   
   PPassContext* = ref TPassContext
-  TPass* = tuple[
-    open: proc (module: PSym, filename: string): PPassContext {.nimcall.},
-    openCached: proc (module: PSym, filename: string,
-                     rd: PRodReader): PPassContext {.nimcall.},
-    close: proc (p: PPassContext, n: PNode): PNode {.nimcall.},
-    process: proc (p: PPassContext, topLevelStmt: PNode): PNode {.nimcall.}]
-    
+
+  TPassOpen* = proc (module: PSym): PPassContext {.nimcall.}
+  TPassOpenCached* =
+    proc (module: PSym, rd: PRodReader): PPassContext {.nimcall.}
+  TPassClose* = proc (p: PPassContext, n: PNode): PNode {.nimcall.}
+  TPassProcess* = proc (p: PPassContext, topLevelStmt: PNode): PNode {.nimcall.}
+
+  TPass* = tuple[open: TPassOpen, openCached: TPassOpenCached,
+                 process: TPassProcess, close: TPassClose]
+
+  TPassData* = tuple[input: PNode, closeOutput: Pnode]
+  TPasses* = openarray[TPass]
+
 # a pass is a tuple of procedure vars ``TPass.close`` may produce additional 
 # nodes. These are passed to the other close procedures. 
 # This mechanism used to be used for the instantiation of generics.
 
-proc registerPass*(p: TPass)
-proc initPass*(p: var TPass)
+proc makePass*(open: TPassOpen = nil,
+               openCached: TPassOpenCached = nil,
+               process: TPassProcess = nil,
+               close: TPassClose = nil): TPass =
+  result.open = open
+  result.openCached = openCached
+  result.close = close
+  result.process = process
+
   # This implements a memory preserving scheme: Top level statements are
   # processed in a pipeline. The compiler never looks at a whole module
   # any longer. However, this is simple to change, as new passes may perform
   # whole program optimizations. For now, we avoid it to save a lot of memory.
-proc processModule*(module: PSym, filename: string, stream: PLLStream, 
-                    rd: PRodReader)
+proc processModule*(module: PSym, stream: PLLStream, rd: PRodReader)
 
 # the semantic checker needs these:
 var 
-  gImportModule*: proc (filename: string): PSym {.nimcall.}
-  gIncludeFile*: proc (filename: string): PNode {.nimcall.}
+  gImportModule*: proc (m: PSym, fileIdx: int32): PSym {.nimcall.}
+  gIncludeFile*: proc (m: PSym, fileIdx: int32): PNode {.nimcall.}
 
 # implementation
 
@@ -74,23 +86,37 @@ type
 
 var 
   gPasses: array[0..maxPasses - 1, TPass]
-  gPassesLen: int
+  gPassesLen*: int
+
+proc clearPasses* =
+  gPassesLen = 0
 
-proc registerPass(p: TPass) = 
+proc registerPass*(p: TPass) = 
   gPasses[gPassesLen] = p
   inc(gPassesLen)
 
-proc openPasses(a: var TPassContextArray, module: PSym, filename: string) = 
+proc carryPass*(p: TPass, module: PSym, m: TPassData): TPassData =
+  var c = p.open(module)
+  result.input = p.process(c, m.input)
+  result.closeOutput = if p.close != nil: p.close(c, m.closeOutput)
+                       else: m.closeOutput
+
+proc carryPasses*(nodes: PNode, module: PSym, passes: TPasses) =
+  var passdata: TPassData
+  passdata.input = nodes
+  for pass in passes:
+    passdata = carryPass(pass, module, passdata)
+
+proc openPasses(a: var TPassContextArray, module: PSym) =
   for i in countup(0, gPassesLen - 1): 
     if not isNil(gPasses[i].open): 
-      a[i] = gPasses[i].open(module, filename)
+      a[i] = gPasses[i].open(module)
     else: a[i] = nil
   
-proc openPassesCached(a: var TPassContextArray, module: PSym, filename: string, 
-                      rd: PRodReader) = 
+proc openPassesCached(a: var TPassContextArray, module: PSym, rd: PRodReader) =
   for i in countup(0, gPassesLen - 1): 
     if not isNil(gPasses[i].openCached): 
-      a[i] = gPasses[i].openCached(module, filename, rd)
+      a[i] = gPasses[i].openCached(module, rd)
       if a[i] != nil: 
         a[i].fromCache = true
     else:
@@ -133,23 +159,24 @@ proc processImplicits(implicits: seq[string], nodeKind: TNodeKind,
     importStmt.addSon str
     if not processTopLevelStmt(importStmt, a): break
   
-proc processModule(module: PSym, filename: string, stream: PLLStream, 
-                   rd: PRodReader) = 
+proc processModule(module: PSym, stream: PLLStream, rd: PRodReader) =
   var 
     p: TParsers
     a: TPassContextArray
     s: PLLStream
+    fileIdx = module.fileIdx
   if rd == nil: 
-    openPasses(a, module, filename)
+    openPasses(a, module)
     if stream == nil: 
+      let filename = fileIdx.toFullPath
       s = LLStreamOpen(filename, fmRead)
       if s == nil: 
         rawMessage(errCannotOpenFile, filename)
-        return 
+        return
     else: 
       s = stream
     while true: 
-      openParsers(p, filename, s)
+      openParsers(p, fileIdx, s)
 
       if sfSystemModule notin module.flags:
         # XXX what about caching? no processing then? what if I change the 
@@ -170,13 +197,8 @@ proc processModule(module: PSym, filename: string, stream: PLLStream,
     # id synchronization point for more consistent code generation:
     IDsynchronizationPoint(1000)
   else:
-    openPassesCached(a, module, filename, rd)
+    openPassesCached(a, module, rd)
     var n = loadInitSection(rd)
     for i in countup(0, sonsLen(n) - 1): processTopLevelStmtCached(n.sons[i], a)
     closePassesCached(a)
 
-proc initPass(p: var TPass) = 
-  p.open = nil
-  p.openCached = nil
-  p.close = nil
-  p.process = nil
diff --git a/compiler/rodread.nim b/compiler/rodread.nim
index 722887299..5dccee9a7 100755
--- a/compiler/rodread.nim
+++ b/compiler/rodread.nim
@@ -126,7 +126,7 @@ type
     s: cstring               # mmap'ed file contents
     options: TOptions
     reason: TReasonForRecompile
-    modDeps: TStringSeq
+    modDeps: seq[int32]
     files: TStringSeq
     dataIdx: int             # offset of start of data section
     convertersIdx: int       # offset of start of converters section
@@ -145,7 +145,7 @@ type
 
 var rodCompilerprocs*: TStrTable
 
-proc handleSymbolFile*(module: PSym, filename: string): PRodReader
+proc handleSymbolFile*(module: PSym): PRodReader
 # global because this is needed by magicsys
 proc loadInitSection*(r: PRodReader): PNode
 
@@ -602,7 +602,7 @@ proc processRodFile(r: PRodReader, crc: TCrc32) =
     of "DEPS":
       inc(r.pos)              # skip ':'
       while r.s[r.pos] > '\x0A': 
-        r.modDeps.add r.files[decodeVInt(r.s, r.pos)]
+        r.modDeps.add int32(decodeVInt(r.s, r.pos))
         if r.s[r.pos] == ' ': inc(r.pos)
     of "INTERF": 
       r.interfIdx = r.pos + 2
@@ -699,10 +699,11 @@ type
     reason*: TReasonForRecompile
     rd*: PRodReader
     crc*: TCrc32
+    crcDone*: bool
 
   TFileModuleMap = seq[TFileModuleRec]
 
-var gMods: TFileModuleMap = @[]
+var gMods*: TFileModuleMap = @[]
 
 proc decodeSymSafePos(rd: PRodReader, offset: int, info: TLineInfo): PSym = 
   # all compiled modules
@@ -720,6 +721,10 @@ proc findSomeWhere(id: int) =
       if d != invalidKey:
         echo "found id ", id, " in ", gMods[i].filename
 
+proc getReader(moduleId: int): PRodReader =
+  InternalAssert moduleId >= 0 and moduleId < gMods.len
+  result = gMods[moduleId].rd
+
 proc rrGetSym(r: PRodReader, id: int, info: TLineInfo): PSym = 
   result = PSym(IdTableGet(r.syms, id))
   if result == nil: 
@@ -732,25 +737,15 @@ proc rrGetSym(r: PRodReader, id: int, info: TLineInfo): PSym =
         var x = ""
         encodeVInt(id, x)
         InternalError(info, "missing from both indexes: +" & x)
-      # find the reader with the correct moduleID:
-      for i in countup(0, high(gMods)): 
-        var rd = gMods[i].rd
-        if rd != nil: 
-          if rd.moduleID == moduleID: 
-            d = IITableGet(rd.index.tab, id)
-            if d != invalidKey: 
-              result = decodeSymSafePos(rd, d, info)
-              break 
-            else:
-              var x = ""
-              encodeVInt(id, x)
-              when false: findSomeWhere(id)
-              InternalError(info, "rrGetSym: no reader found: +" & x)
-          else:
-            #if IiTableGet(rd.index.tab, id) <> invalidKey then
-            # XXX expensive check!
-            #InternalError(info,
-            #'id found in other module: +' + ropeToStr(encodeInt(id)))
+      var rd = getReader(moduleID)
+      d = IITableGet(rd.index.tab, id)
+      if d != invalidKey: 
+        result = decodeSymSafePos(rd, d, info)
+      else:
+        var x = ""
+        encodeVInt(id, x)
+        when false: findSomeWhere(id)
+        InternalError(info, "rrGetSym: no reader found: +" & x)
     else: 
       # own symbol:
       result = decodeSymSafePos(r, d, info)
@@ -789,27 +784,32 @@ proc loadMethods(r: PRodReader) =
     var d = decodeVInt(r.s, r.pos)
     r.methods.add(rrGetSym(r, d, UnknownLineInfo()))
     if r.s[r.pos] == ' ': inc(r.pos)
+
+proc GetCRC*(fileIdx: int32): TCrc32 =
+  InternalAssert fileIdx >= 0 and fileIdx < gMods.len
+
+  if gMods[fileIdx].crcDone:
+    return gMods[fileIdx].crc
   
-proc getModuleIdx(filename: string): int = 
-  for i in countup(0, high(gMods)): 
-    if gMods[i].filename == filename: return i
-  result = len(gMods)
-  setlen(gMods, result + 1)
-
-proc checkDep(filename: string): TReasonForRecompile =
-  assert(not isNil(filename)) 
-  var idx = getModuleIdx(filename)
-  if gMods[idx].reason != rrEmpty: 
+  result = crcFromFile(fileIdx.toFilename)
+  gMods[fileIdx].crc = result
+
+template growCache*(cache, pos) =
+  if cache.len <= pos: cache.setLen(pos+1)
+
+proc checkDep(fileIdx: int32): TReasonForRecompile =
+  assert fileIdx != InvalidFileIDX
+  growCache gMods, fileIdx
+  if gMods[fileIdx].reason != rrEmpty: 
     # reason has already been computed for this module:
-    return gMods[idx].reason
-  var crc: TCrc32 = crcFromFile(filename)
-  gMods[idx].reason = rrNone  # we need to set it here to avoid cycles
-  gMods[idx].filename = filename
-  gMods[idx].crc = crc
+    return gMods[fileIdx].reason
+  let filename = fileIdx.toFilename
+  var crc = GetCRC(fileIdx)
+  gMods[fileIdx].reason = rrNone  # we need to set it here to avoid cycles
   result = rrNone
   var r: PRodReader = nil
   var rodfile = toGeneratedFile(filename, RodExt)
-  r = newRodReader(rodfile, crc, idx)
+  r = newRodReader(rodfile, crc, fileIdx)
   if r == nil: 
     result = (if ExistsFile(rodfile): rrRodInvalid else: rrRodDoesNotExist)
   else:
@@ -819,7 +819,7 @@ proc checkDep(filename: string): TReasonForRecompile =
       # NOTE: we need to process the entire module graph so that no ID will
       # be used twice! However, compilation speed does not suffer much from
       # this, since results are cached.
-      var res = checkDep(options.libpath / addFileExt("system", nimExt))
+      var res = checkDep(SystemFileIdx)
       if res != rrNone: result = rrModDeps
       for i in countup(0, high(r.modDeps)): 
         res = checkDep(r.modDeps[i])
@@ -832,19 +832,19 @@ proc checkDep(filename: string): TReasonForRecompile =
     # recompilation is necessary:
     if r != nil: memfiles.close(r.memFile)
     r = nil
-  gMods[idx].rd = r
-  gMods[idx].reason = result  # now we know better
+  gMods[fileIdx].rd = r
+  gMods[fileIdx].reason = result  # now we know better
   
-proc handleSymbolFile(module: PSym, filename: string): PRodReader = 
+proc handleSymbolFile(module: PSym): PRodReader = 
+  let fileIdx = module.fileIdx
   if optSymbolFiles notin gGlobalOptions: 
     module.id = getID()
     return nil
   idgen.loadMaxIds(options.gProjectPath / options.gProjectName)
 
-  discard checkDep(filename)
-  var idx = getModuleIdx(filename)
-  if gMods[idx].reason == rrEmpty: InternalError("handleSymbolFile")
-  result = gMods[idx].rd
+  discard checkDep(fileIdx)
+  if gMods[fileIdx].reason == rrEmpty: InternalError("handleSymbolFile")
+  result = gMods[fileIdx].rd
   if result != nil: 
     module.id = result.moduleID
     IdTablePut(result.syms, module, module)
@@ -854,14 +854,6 @@ proc handleSymbolFile(module: PSym, filename: string): PRodReader =
     loadMethods(result)
   else:
     module.id = getID()
-  
-proc GetCRC*(filename: string): TCrc32 = 
-  for i in countup(0, high(gMods)): 
-    if gMods[i].filename == filename: return gMods[i].crc
-  
-  result = crcFromFile(filename)
-  #var idx = getModuleIdx(filename)
-  #result = gMods[idx].crc
 
 proc rawLoadStub(s: PSym) =
   if s.kind != skStub: InternalError("loadStub")
diff --git a/compiler/rodwrite.nim b/compiler/rodwrite.nim
index 0a361d4dd..c0a0cc4eb 100755
--- a/compiler/rodwrite.nim
+++ b/compiler/rodwrite.nim
@@ -15,7 +15,6 @@ import
   intsets, os, options, strutils, nversion, ast, astalgo, msgs, platform,
   condsyms, ropes, idents, crc, rodread, passes, importer, idgen, rodutils
 
-proc rodwritePass*(): TPass
 # implementation
 
 type 
@@ -32,14 +31,13 @@ type
     converters, methods: string
     init: string
     data: string
-    filename: string
     sstack: TSymSeq          # a stack of symbols to process
     tstack: TTypeSeq         # a stack of types to process
     files: TStringSeq
 
   PRodWriter = ref TRodWriter
 
-proc newRodWriter(modfilename: string, crc: TCrc32, module: PSym): PRodWriter
+proc newRodWriter(crc: TCrc32, module: PSym): PRodWriter
 proc addModDep(w: PRodWriter, dep: string)
 proc addInclDep(w: PRodWriter, dep: string)
 proc addInterfaceSym(w: PRodWriter, s: PSym)
@@ -64,7 +62,10 @@ proc fileIdx(w: PRodWriter, filename: string): int =
   setlen(w.files, result + 1)
   w.files[result] = filename
 
-proc newRodWriter(modfilename: string, crc: TCrc32, module: PSym): PRodWriter = 
+template filename*(w: PRodWriter): string =
+  w.module.filename
+
+proc newRodWriter(crc: TCrc32, module: PSym): PRodWriter = 
   new(result)
   result.sstack = @[]
   result.tstack = @[]
@@ -72,7 +73,6 @@ proc newRodWriter(modfilename: string, crc: TCrc32, module: PSym): PRodWriter =
   InitIITable(result.imports.tab)
   result.index.r = ""
   result.imports.r = ""
-  result.filename = modfilename
   result.crc = crc
   result.module = module
   result.defines = getDefines()
@@ -571,9 +571,9 @@ proc process(c: PPassContext, n: PNode): PNode =
   else: 
     nil
 
-proc myOpen(module: PSym, filename: string): PPassContext =
+proc myOpen(module: PSym): PPassContext =
   if module.id < 0: InternalError("rodwrite: module ID not set")
-  var w = newRodWriter(filename, rodread.GetCRC(module.info.toFullPath), module)
+  var w = newRodWriter(module.fileIdx.GetCRC, module)
   rawAddInterfaceSym(w, module)
   result = w
 
@@ -583,10 +583,5 @@ proc myClose(c: PPassContext, n: PNode): PNode =
   writeRod(w)
   idgen.saveMaxIds(options.gProjectPath / options.gProjectName)
 
-proc rodwritePass(): TPass = 
-  initPass(result)
-  if optSymbolFiles in gGlobalOptions: 
-    result.open = myOpen
-    result.close = myClose
-    result.process = process
+const rodwritePass* = makePass(open = myOpen, close = myClose, process = process)
 
diff --git a/compiler/ropes.nim b/compiler/ropes.nim
index 50c89e4d9..707c29123 100755
--- a/compiler/ropes.nim
+++ b/compiler/ropes.nim
@@ -56,7 +56,7 @@
 #  To cache them they are inserted in a `cache` array.
 
 import 
-  msgs, strutils, platform, hashes, crc, options
+  strutils, platform, hashes, crc, options
 
 type
   TFormatStr* = string # later we may change it to CString for better
@@ -72,6 +72,11 @@ type
   
   TRopeSeq* = seq[PRope]
 
+  TRopesError* = enum
+    rCannotOpenFile
+    rInvalidFormatStr
+    rTokenTooLong
+
 proc con*(a, b: PRope): PRope
 proc con*(a: PRope, b: string): PRope
 proc con*(a: string, b: PRope): PRope
@@ -92,11 +97,14 @@ proc RopeInvariant*(r: PRope): bool
   # exported for debugging
 # implementation
 
+var ErrorHandler*: proc(err: TRopesError, msg: string, useWarning = false)
+  # avoid dependency on msgs.nim
+  
 proc ropeLen(a: PRope): int = 
   if a == nil: result = 0
   else: result = a.length
   
-proc newRope(data: string = nil): PRope = 
+proc newRope*(data: string = nil): PRope = 
   new(result)
   if data != nil: 
     result.length = len(data)
@@ -114,6 +122,10 @@ proc freezeMutableRope*(r: PRope) {.inline.} =
 var 
   cache: array[0..2048*2 -1, PRope]
 
+proc resetRopeCache* =
+  for i in low(cache)..high(cache):
+    cache[i] = nil
+
 proc RopeInvariant(r: PRope): bool = 
   if r == nil: 
     result = true
@@ -127,10 +139,16 @@ proc RopeInvariant(r: PRope): bool =
                   #      if result then result := ropeInvariant(r.right);
                   #    end 
 
+var gCacheTries* = 0
+var gCacheMisses* = 0
+var gCacheIntTries* = 0
+
 proc insertInCache(s: string): PRope = 
+  inc gCacheTries
   var h = hash(s) and high(cache)
   result = cache[h]
   if isNil(result) or result.data != s:
+    inc gCacheMisses
     result = newRope(s)
     cache[h] = result
   
@@ -186,7 +204,13 @@ proc con(a: string, b: PRope): PRope = result = con(toRope(a), b)
 proc con(a: varargs[PRope]): PRope = 
   for i in countup(0, high(a)): result = con(result, a[i])
 
-proc toRope(i: BiggestInt): PRope = result = toRope($i)
+proc ropeConcat*(a: varargs[PRope]): PRope =
+  # not overloaded version of concat to speed-up `rfmt` a little bit
+  for i in countup(0, high(a)): result = con(result, a[i])
+
+proc toRope(i: BiggestInt): PRope =
+  inc gCacheIntTries
+  result = toRope($i)
 
 proc app(a: var PRope, b: PRope) = a = con(a, b)
 proc app(a: var PRope, b: string) = a = con(a, b)
@@ -209,8 +233,11 @@ proc WriteRope*(head: PRope, filename: string, useWarning = false) =
     if head != nil: WriteRope(f, head)
     close(f)
   else:
-    rawMessage(if useWarning: warnCannotOpenFile else: errCannotOpenFile,
-               filename)
+    ErrorHandler(rCannotOpenFile, filename, useWarning)
+
+var
+  rnl* = tnl.newRope
+  softRnl* = tnl.newRope
 
 proc ropef(frmt: TFormatStr, args: varargs[PRope]): PRope = 
   var i = 0
@@ -235,17 +262,18 @@ proc ropef(frmt: TFormatStr, args: varargs[PRope]): PRope =
           inc(i)
           if (i > length + 0 - 1) or not (frmt[i] in {'0'..'9'}): break 
         num = j
-        if j > high(args) + 1: 
-          internalError("ropes: invalid format string $" & $(j))
+        if j > high(args) + 1:
+          ErrorHandler(rInvalidFormatStr, $(j))
         else:
           app(result, args[j - 1])
       of 'n':
-        if optLineDir notin gOptions: app(result, tnl)
+        app(result, softRnl)
         inc i
       of 'N':
-        app(result, tnl)
+        app(result, rnl)
         inc(i)
-      else: InternalError("ropes: invalid format string $" & frmt[i])
+      else:
+        ErrorHandler(rInvalidFormatStr, $(frmt[i]))
     var start = i
     while i < length:
       if frmt[i] != '$': inc(i)
@@ -254,6 +282,14 @@ proc ropef(frmt: TFormatStr, args: varargs[PRope]): PRope =
       app(result, substr(frmt, start, i - 1))
   assert(RopeInvariant(result))
 
+{.push stack_trace: off, line_trace: off.}
+proc `~`*(r: expr[string]): PRope =
+  # this is the new optimized "to rope" operator
+  # the mnemonic is that `~` looks a bit like a rope :)
+  var r {.global.} = r.ropef
+  return r
+{.pop.}
+
 proc appf(c: var PRope, frmt: TFormatStr, args: varargs[PRope]) = 
   app(c, ropef(frmt, args))
 
@@ -262,8 +298,8 @@ const
 
 proc auxRopeEqualsFile(r: PRope, bin: var tfile, buf: Pointer): bool = 
   if r.data != nil:
-    if r.length > bufSize: 
-      internalError("ropes: token too long")
+    if r.length > bufSize:
+      ErrorHandler(rTokenTooLong, r.data)
       return
     var readBytes = readBuffer(bin, buf, r.length)
     result = readBytes == r.length and
diff --git a/compiler/sem.nim b/compiler/sem.nim
index 77e93a066..555f5e7b7 100755
--- a/compiler/sem.nim
+++ b/compiler/sem.nim
@@ -17,7 +17,6 @@ import
   semthreads, intsets, transf, evals, idgen, aliases, cgmeth, lambdalifting,
   evaltempl, patterns, parampatterns, sempass2
 
-proc semPass*(): TPass
 # implementation
 
 type 
@@ -161,7 +160,7 @@ proc semMacroExpr(c: PContext, n, nOrig: PNode, sym: PSym,
     GlobalError(n.info, errRecursiveDependencyX, sym.name.s)
 
   if c.evalContext == nil:
-    c.evalContext = newEvalContext(c.module, "", emStatic)
+    c.evalContext = newEvalContext(c.module, emStatic)
     c.evalContext.getType = proc (n: PNode): PNode =
       var e = tryExpr(c, n)
       if e == nil:
@@ -191,21 +190,21 @@ proc semConstBoolExpr(c: PContext, n: PNode): PNode =
 
 include semtypes, semtempl, semgnrc, semstmts, semexprs
 
-proc addCodeForGenerics(c: PContext, n: PNode) = 
-  for i in countup(c.generics.lastGenericIdx, Len(c.generics.generics) - 1):
-    var prc = c.generics.generics[i].instSym
-    if prc.kind in {skProc, skMethod, skConverter} and prc.magic == mNone: 
-      if prc.ast == nil or prc.ast.sons[bodyPos] == nil: 
+proc addCodeForGenerics(c: PContext, n: PNode) =
+  for i in countup(c.lastGenericIdx, c.generics.len - 1):
+    var prc = c.generics[i].inst.sym
+    if prc.kind in {skProc, skMethod, skConverter} and prc.magic == mNone:
+      if prc.ast == nil or prc.ast.sons[bodyPos] == nil:
         InternalError(prc.info, "no code for " & prc.name.s)
       else:
         addSon(n, prc.ast)
-  c.generics.lastGenericIdx = Len(c.generics.generics)
+  c.lastGenericIdx = c.generics.len
 
 proc semExprNoFlags(c: PContext, n: PNode): PNode {.procvar.} = 
   result = semExpr(c, n, {})
 
-proc myOpen(module: PSym, filename: string): PPassContext = 
-  var c = newContext(module, filename)
+proc myOpen(module: PSym): PPassContext =
+  var c = newContext(module)
   if c.p != nil: InternalError(module.info, "sem.myOpen")
   c.semConstExpr = semConstExpr
   c.semExpr = semExprNoFlags
@@ -225,15 +224,14 @@ proc myOpen(module: PSym, filename: string): PPassContext =
   openScope(c.tab)            # scope for the module's symbols  
   result = c
 
-proc myOpenCached(module: PSym, filename: string, 
-                  rd: PRodReader): PPassContext = 
-  result = myOpen(module, filename)
+proc myOpenCached(module: PSym, rd: PRodReader): PPassContext =
+  result = myOpen(module)
   for m in items(rd.methods): methodDef(m, true)
 
 proc SemStmtAndGenerateGenerics(c: PContext, n: PNode): PNode = 
   result = semStmt(c, n)
   # BUGFIX: process newly generated generics here, not at the end!
-  if c.generics.lastGenericIdx < Len(c.generics.generics):
+  if c.lastGenericIdx < c.generics.len:
     var a = newNodeI(nkStmtList, n.info)
     addCodeForGenerics(c, a)
     if sonsLen(a) > 0: 
@@ -290,9 +288,5 @@ proc myClose(context: PPassContext, n: PNode): PNode =
   popOwner()
   popProcCon(c)
 
-proc semPass(): TPass =
-  initPass(result)
-  result.open = myOpen
-  result.openCached = myOpenCached
-  result.close = myClose
-  result.process = myProcess
+const semPass* = makePass(myOpen, myOpenCached, myProcess, myClose)
+
diff --git a/compiler/semdata.nim b/compiler/semdata.nim
index 4ead9cf13..f5d5a9604 100755
--- a/compiler/semdata.nim
+++ b/compiler/semdata.nim
@@ -36,25 +36,14 @@ type
                               # in standalone ``except`` and ``finally``
     next*: PProcCon           # used for stacking procedure contexts
   
-  TInstantiatedSymbol* {.final.} = object
-    genericSym*, instSym*: PSym
-    concreteTypes*: seq[PType]
-  
-  # If we generate an instance of a generic, we'd like to re-use that
-  # instance if possible across module boundaries. However, this is not
-  # possible if the compilation cache is enabled. So we give up then and use
-  # the caching of generics only per module, not per project.
-  TGenericsCache* {.final.} = object
-    InstTypes*: TIdTable # map PType to PType
-    generics*: seq[TInstantiatedSymbol] # a list of the things to compile
-    lastGenericIdx*: int      # used for the generics stack
-  
-  PGenericsCache* = ref TGenericsCache
+  TInstantiationPair* = object
+    genericSym*: PSym
+    inst*: PInstantiation
+    
   PContext* = ref TContext
   TContext* = object of TPassContext # a context represents a module
     module*: PSym              # the module sym belonging to the context
     p*: PProcCon               # procedure context
-    generics*: PGenericsCache  # may point to a global or module-local structure
     friendModule*: PSym        # current friend module; may access private data;
                                # this is used so that generic instantiations
                                # can access private object fields
@@ -81,21 +70,23 @@ type
                               filter: TSymKinds): PNode {.nimcall.}
     semTypeNode*: proc(c: PContext, n: PNode, prev: PType): PType {.nimcall.}
     includedFiles*: TIntSet    # used to detect recursive include files
-    filename*: string          # the module's filename
     userPragmas*: TStrTable
     evalContext*: PEvalContext
     UnknownIdents*: TIntSet    # ids of all unknown identifiers to prevent
                                # naming it multiple times
+    generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile
+    lastGenericIdx*: int      # used for the generics stack
+    
 
-var
-  gGenericsCache: PGenericsCache # save for modularity
+proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair =
+  result.genericSym = s
+  result.inst = inst
 
-proc newGenericsCache*(): PGenericsCache =
-  new(result)
-  initIdTable(result.InstTypes)
-  result.generics = @[]
+proc filename*(c: PContext): string =
+  # the module's filename
+  return c.module.filename
 
-proc newContext*(module: PSym, nimfile: string): PContext
+proc newContext*(module: PSym): PContext
 
 proc lastOptionEntry*(c: PContext): POptionEntry
 proc newOptionEntry*(): POptionEntry
@@ -152,7 +143,7 @@ proc newOptionEntry(): POptionEntry =
   result.dynlib = nil
   result.notes = gNotes
 
-proc newContext(module: PSym, nimfile: string): PContext = 
+proc newContext(module: PSym): PContext =
   new(result)
   InitSymTab(result.tab)
   result.AmbiguousSymbols = initIntset()
@@ -164,18 +155,9 @@ proc newContext(module: PSym, nimfile: string): PContext =
   result.threadEntries = @[]
   result.converters = @[]
   result.patterns = @[]
-  result.filename = nimfile
   result.includedFiles = initIntSet()
   initStrTable(result.userPragmas)
-  if optSymbolFiles notin gGlobalOptions:
-    # re-usage of generic instantiations across module boundaries is
-    # very nice for code size:
-    if gGenericsCache == nil: gGenericsCache = newGenericsCache()
-    result.generics = gGenericsCache
-  else:
-    # we have to give up and use a per-module cache for generic instantiations:
-    result.generics = newGenericsCache()
-    assert gGenericsCache == nil
+  result.generics = @[]
   result.UnknownIdents = initIntSet()
 
 proc inclSym(sq: var TSymSeq, s: PSym) =
diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim
index 4b4cb5bdf..48fe5b4d7 100755
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -1361,7 +1361,7 @@ proc tryExpr(c: PContext, n: PNode, flags: TExprFlags = {}): PNode =
   let oldInUnrolledContext = c.InUnrolledContext
   let oldInGenericInst = c.InGenericInst
   let oldProcCon = c.p
-  c.generics = newGenericsCache()
+  c.generics = @[]
   try:
     result = semExpr(c, n, flags)
     if msgs.gErrorCounter != oldErrorCount: result = nil
diff --git a/compiler/seminst.nim b/compiler/seminst.nim
index b54170435..95a394a09 100755
--- a/compiler/seminst.nim
+++ b/compiler/seminst.nim
@@ -11,7 +11,7 @@
 # included from sem.nim
 
 proc instantiateGenericParamList(c: PContext, n: PNode, pt: TIdTable,
-                                 entry: var TInstantiatedSymbol) = 
+                                 entry: var TInstantiation) = 
   if n.kind != nkGenericParams: 
     InternalError(n.info, "instantiateGenericParamList; no generic params")
   newSeq(entry.concreteTypes, n.len)
@@ -43,22 +43,18 @@ proc instantiateGenericParamList(c: PContext, n: PNode, pt: TIdTable,
     addDecl(c, s)
     entry.concreteTypes[i] = t
 
-proc sameInstantiation(a, b: TInstantiatedSymbol): bool =
-  if a.genericSym.id == b.genericSym.id and 
-      a.concreteTypes.len == b.concreteTypes.len:
-    for i in 0 .. < a.concreteTypes.len:
+proc sameInstantiation(a, b: TInstantiation): bool =
+  if a.concreteTypes.len == b.concreteTypes.len:
+    for i in 0..a.concreteTypes.high:
       if not compareTypes(a.concreteTypes[i], b.concreteTypes[i],
                           flags = {TypeDescExactMatch}): return
     result = true
 
-proc GenericCacheGet(c: PContext, entry: var TInstantiatedSymbol): PSym = 
-  for i in countup(0, Len(c.generics.generics) - 1):
-    if sameInstantiation(entry, c.generics.generics[i]):
-      result = c.generics.generics[i].instSym
-      # checking for the concrete parameter list is wrong and unnecessary!
-      #if equalParams(b.typ.n, instSym.typ.n) == paramsEqual:
-      #echo "found in cache: ", getProcHeader(result)
-      return
+proc GenericCacheGet(genericSym: Psym, entry: TInstantiation): PSym =
+  if genericSym.procInstCache != nil:
+    for inst in genericSym.procInstCache:
+      if sameInstantiation(entry, inst[]):
+        return inst.sym
 
 proc removeDefaultParamValues(n: PNode) = 
   # we remove default params, because they cannot be instantiated properly
@@ -106,9 +102,9 @@ proc instantiateBody(c: PContext, n: PNode, result: PSym) =
     popProcCon(c)
 
 proc fixupInstantiatedSymbols(c: PContext, s: PSym) =
-  for i in countup(0, Len(c.generics.generics) - 1):
-    if c.generics.generics[i].genericSym.id == s.id:
-      var oldPrc = c.generics.generics[i].instSym
+  for i in countup(0, c.generics.len - 1):
+    if c.generics[i].genericSym.id == s.id:
+      var oldPrc = c.generics[i].inst.sym
       pushInfoContext(oldPrc.info)
       openScope(c.tab)
       var n = oldPrc.ast
@@ -153,10 +149,9 @@ proc generateInstance(c: PContext, fn: PSym, pt: TIdTable,
     InternalError(n.info, "generateInstance")
   n.sons[namePos] = newSymNode(result)
   pushInfoContext(info)
-  var entry: TInstantiatedSymbol
-  entry.instSym = result
-  entry.genericSym = fn
-  instantiateGenericParamList(c, n.sons[genericParamsPos], pt, entry)
+  var entry = TInstantiation.new
+  entry.sym = result
+  instantiateGenericParamList(c, n.sons[genericParamsPos], pt, entry[])
   n.sons[genericParamsPos] = ast.emptyNode
   # semantic checking for the parameters:
   if n.sons[paramsPos].kind != nkEmpty:
@@ -167,10 +162,10 @@ proc generateInstance(c: PContext, fn: PSym, pt: TIdTable,
     rawAddSon(result.typ, nil)
   result.typ.callConv = fn.typ.callConv
   if result.kind == skIterator: result.typ.flags.incl(tfIterator)
-
-  var oldPrc = GenericCacheGet(c, entry)
+  var oldPrc = GenericCacheGet(fn, entry[])
   if oldPrc == nil:
-    c.generics.generics.add(entry)
+    fn.procInstCache.safeAdd(entry)
+    c.generics.add(makeInstPair(fn, entry))
     if n.sons[pragmasPos].kind != nkEmpty:
       pragma(c, result, n.sons[pragmasPos], allRoutinePragmas)
     if isNil(n.sons[bodyPos]):
diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim
index e3956b918..c38e2f3ad 100755
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -916,13 +916,12 @@ proc evalInclude(c: PContext, n: PNode): PNode =
   addSon(result, n)
   for i in countup(0, sonsLen(n) - 1): 
     var f = checkModuleName(n.sons[i])
-    if f.len > 0:
-      var fileIndex = f.fileInfoIdx
-      if ContainsOrIncl(c.includedFiles, fileIndex): 
-        LocalError(n.info, errRecursiveDependencyX, f.extractFilename)
+    if f != InvalidFileIDX:
+      if ContainsOrIncl(c.includedFiles, f): 
+        LocalError(n.info, errRecursiveDependencyX, f.toFilename)
       else:
-        addSon(result, semStmt(c, gIncludeFile(f)))
-        Excl(c.includedFiles, fileIndex)
+        addSon(result, semStmt(c, gIncludeFile(c.module, f)))
+        Excl(c.includedFiles, f)
   
 proc setLine(n: PNode, info: TLineInfo) =
   for i in 0 .. <safeLen(n): setLine(n.sons[i], info)
@@ -958,7 +957,7 @@ var
   destructorName = getIdent"destroy_"
   destructorParam = getIdent"this_"
   destructorPragma = newIdentNode(getIdent"destructor", UnknownLineInfo())
-  rangeDestructorProc: PSym
+  rangeDestructorProc*: PSym
 
 proc destroyField(c: PContext, field: PSym, holder: PNode): PNode =
   if instantiateDestructor(c, field.typ):
diff --git a/compiler/semtypinst.nim b/compiler/semtypinst.nim
index e12640945..26341525c 100755
--- a/compiler/semtypinst.nim
+++ b/compiler/semtypinst.nim
@@ -31,24 +31,35 @@ proc checkConstructedType*(info: TLineInfo, typ: PType) =
       if t.sons[0].kind != tyObject or tfFinal in t.sons[0].flags: 
         localError(info, errInheritanceOnlyWithNonFinalObjects)
 
-proc searchInstTypes(tab: TIdTable, key: PType): PType = 
-  # returns nil if we need to declare this type
-  result = PType(IdTableGet(tab, key))
-  if result == nil and tab.counter > 0: 
-    # we have to do a slow linear search because types may need
-    # to be compared by their structure:
-    for h in countup(0, high(tab.data)): 
-      var t = PType(tab.data[h].key)
-      if t != nil: 
-        if key.containerId == t.containerId: 
-          var match = true
-          for j in countup(0, sonsLen(t) - 1): 
-            # XXX sameType is not really correct for nested generics?
-            if not sameType(t.sons[j], key.sons[j]): 
-              match = false
-              break 
-          if match: 
-            return PType(tab.data[h].val)
+proc searchInstTypes(key: PType): PType =
+  let genericTyp = key.sons[0]
+  InternalAssert genericTyp.kind == tyGenericBody and
+                 key.sons[0] == genericTyp and
+                 genericTyp.sym != nil
+
+  if genericTyp.sym.typeInstCache == nil:
+    return
+
+  for inst in genericTyp.sym.typeInstCache:
+    if inst.id == key.id: return inst
+    if inst.sons.len < key.sons.len:
+      # XXX: This happens for prematurely cached
+      # types such as TChannel[empty]. Why?
+      # See the notes for PActor in handleGenericInvokation
+      return
+    block MatchType:
+      for j in 1 .. high(key.sons):
+        # XXX sameType is not really correct for nested generics?
+        if not sameType(inst.sons[j], key.sons[j]):
+          break MatchType
+      
+      return inst
+
+proc cacheTypeInst(inst: PType) =
+  # XXX: add to module's generics
+  #      update the refcount
+  let genericTyp = inst.sons[0]
+  genericTyp.sym.typeInstCache.safeAdd(inst)
 
 type
   TReplTypeVars* {.final.} = object 
@@ -132,74 +143,51 @@ proc handleGenericInvokation(cl: var TReplTypeVars, t: PType): PType =
   var body = t.sons[0]
   if body.kind != tyGenericBody: InternalError(cl.info, "no generic body")
   var header: PType = nil
-  when true:
-    # search for some instantiation here:
-    result = searchInstTypes(cl.c.generics.InstTypes, t)
-    if result != nil: return
-    for i in countup(1, sonsLen(t) - 1):
-      var x = t.sons[i]
-      if x.kind == tyGenericParam: 
-        x = lookupTypeVar(cl, x)
-        if header == nil: header = copyType(t, t.owner, false)
-        header.sons[i] = x
-        #idTablePut(cl.typeMap, body.sons[i-1], x)
-    if header != nil:
-      # search again after first pass:
-      result = searchInstTypes(cl.c.generics.InstTypes, header)
-      if result != nil: return
-    else:
-      header = copyType(t, t.owner, false)
-    # ugh need another pass for deeply recursive generic types (e.g. PActor)
-    # we need to add the candidate here, before it's fully instantiated for
-    # recursive instantions:
-    result = newType(tyGenericInst, t.sons[0].owner)
-    idTablePut(cl.c.generics.InstTypes, header, result)
-
-    for i in countup(1, sonsLen(t) - 1):
-      var x = replaceTypeVarsT(cl, t.sons[i])
-      assert x.kind != tyGenericInvokation
+  # search for some instantiation here:
+  result = searchInstTypes(t)
+  if result != nil: return
+  for i in countup(1, sonsLen(t) - 1):
+    var x = t.sons[i]
+    if x.kind == tyGenericParam: 
+      x = lookupTypeVar(cl, x)
+      if header == nil: header = copyType(t, t.owner, false)
       header.sons[i] = x
-      idTablePut(cl.typeMap, body.sons[i-1], x)
-    
-    for i in countup(0, sonsLen(t) - 1): 
-      # if one of the params is not concrete, we cannot do anything
-      # but we already raised an error!
-      rawAddSon(result, header.sons[i])
-    
-    var newbody = ReplaceTypeVarsT(cl, lastSon(body))
-    newbody.flags = newbody.flags + t.flags + body.flags
-    result.flags = result.flags + newbody.flags
-    newbody.callConv = body.callConv
-    newbody.n = ReplaceTypeVarsN(cl, lastSon(body).n)
-    # This type may be a generic alias and we want to resolve it here.
-    # One step is enough, because the recursive nature of
-    # handleGenericInvokation will handle the alias-to-alias-to-alias case
-    if newbody.isGenericAlias: newbody = newbody.skipGenericAlias
-    rawAddSon(result, newbody)
-    checkPartialConstructedType(cl.info, newbody)
+      #idTablePut(cl.typeMap, body.sons[i-1], x)
+  if header != nil:
+    # search again after first pass:
+    result = searchInstTypes(header)
+    if result != nil: return
   else:
-    for i in countup(1, sonsLen(t) - 1):
-      if PType(idTableGet(cl.typeMap, t.sons[i])) == nil: debug(t)
-      var x = replaceTypeVarsT(cl, t.sons[i])
-      if t.sons[i].kind == tyGenericParam: 
-        if header == nil: header = copyType(t, t.owner, false)
-        header.sons[i] = x
-      assert x.kind != tyGenericInvokation
-      idTablePut(cl.typeMap, body.sons[i-1], x)
-    if header == nil: header = t
-    result = searchInstTypes(cl.c.generics.InstTypes, header)
-    if result != nil: return 
-    result = newType(tyGenericInst, t.sons[0].owner)
-    for i in countup(0, sonsLen(t) - 1): 
-      # if one of the params is not concrete, we cannot do anything
-      # but we already raised an error!
-      addSon(result, header.sons[i])
-    idTablePut(cl.c.generics.InstTypes, header, result)
-    var newbody = ReplaceTypeVarsT(cl, lastSon(body))
-    newbody.flags = newbody.flags + t.flags + body.flags
-    newbody.n = ReplaceTypeVarsN(cl, lastSon(body).n)
-    addSon(result, newbody)
-    checkPartialConstructedType(cl.info, newbody)
+    header = copyType(t, t.owner, false)
+  # ugh need another pass for deeply recursive generic types (e.g. PActor)
+  # we need to add the candidate here, before it's fully instantiated for
+  # recursive instantions:
+  result = newType(tyGenericInst, t.sons[0].owner)
+  result.rawAddSon(header.sons[0])
+  cacheTypeInst(result)
+
+  for i in countup(1, sonsLen(t) - 1):
+    var x = replaceTypeVarsT(cl, t.sons[i])
+    assert x.kind != tyGenericInvokation
+    header.sons[i] = x
+    idTablePut(cl.typeMap, body.sons[i-1], x)
+  
+  for i in countup(1, sonsLen(t) - 1): 
+    # if one of the params is not concrete, we cannot do anything
+    # but we already raised an error!
+    rawAddSon(result, header.sons[i])
+  
+  var newbody = ReplaceTypeVarsT(cl, lastSon(body))
+  newbody.flags = newbody.flags + t.flags + body.flags
+  result.flags = result.flags + newbody.flags
+  newbody.callConv = body.callConv
+  newbody.n = ReplaceTypeVarsN(cl, lastSon(body).n)
+  # This type may be a generic alias and we want to resolve it here.
+  # One step is enough, because the recursive nature of
+  # handleGenericInvokation will handle the alias-to-alias-to-alias case
+  if newbody.isGenericAlias: newbody = newbody.skipGenericAlias
+  rawAddSon(result, newbody)
+  checkPartialConstructedType(cl.info, newbody)
   
 proc ReplaceTypeVarsT*(cl: var TReplTypeVars, t: PType): PType = 
   result = t
@@ -212,7 +200,7 @@ proc ReplaceTypeVarsT*(cl: var TReplTypeVars, t: PType): PType =
       result = handleGenericInvokation(cl, result)
   of tyGenericInvokation: 
     result = handleGenericInvokation(cl, t)
-  of tyGenericBody: 
+  of tyGenericBody:
     InternalError(cl.info, "ReplaceTypeVarsT: tyGenericBody")
     result = ReplaceTypeVarsT(cl, lastSon(t))
   of tyInt:
diff --git a/compiler/service.nim b/compiler/service.nim
index b1741b7bd..defdbceb7 100644
--- a/compiler/service.nim
+++ b/compiler/service.nim
@@ -12,15 +12,20 @@
 import 
   sockets,
   times, commands, options, msgs, nimconf,
-  extccomp, strutils, os, platform, main, parseopt
+  extccomp, strutils, os, platform, parseopt
 
 # We cache modules and the dependency graph. However, we don't check for
 # file changes but expect the client to tell us about them, otherwise the
 # repeated CRC calculations may turn out to be too slow.
 
-var 
-  arguments*: string = ""     # the arguments to be passed to the program that
-                              # should be run
+var
+  curCaasCmd* = ""
+  lastCaasCmd* = ""
+    # in caas mode, the list of defines and options will be given at start-up?
+    # it's enough to check that the previous compilation command is the same?
+  arguments* = ""
+    # the arguments to be passed to the program that
+    # should be run
 
 proc ProcessCmdLine*(pass: TCmdLinePass, cmd: string) =
   var p = parseopt.initOptParser(cmd)
@@ -56,17 +61,33 @@ proc ProcessCmdLine*(pass: TCmdLinePass, cmd: string) =
       rawMessage(errArgsNeedRunOption, [])
 
 proc serve*(action: proc (){.nimcall.}) =
-  var server = Socket()
-  let p = getConfigVar("server.port")
-  let port = if p.len > 0: parseInt(p).TPort else: 6000.TPort
-  server.bindAddr(port, getConfigVar("server.address"))
-  var inp = "".TaintedString
-  server.listen()
-  new(stdoutSocket)
-  while true:
-    accept(server, stdoutSocket)
-    discard stdoutSocket.recvLine(inp)
-    processCmdLine(passCmd2, inp.string)
+  template execute(cmd) =
+    curCaasCmd = cmd
+    processCmdLine(passCmd2, cmd)
     action()
-    stdoutSocket.send("\c\L")
-    stdoutSocket.close()
+
+  let typ = getConfigVar("server.type")
+  case typ
+  of "stdin":
+    while true:
+      var line = stdin.readLine.string
+      if line == "quit": quit()
+      execute line
+  of "tcp", "":
+    var server = Socket()
+    let p = getConfigVar("server.port")
+    let port = if p.len > 0: parseInt(p).TPort else: 6000.TPort
+    server.bindAddr(port, getConfigVar("server.address"))
+    var inp = "".TaintedString
+    server.listen()
+    new(stdoutSocket)
+    while true:
+      accept(server, stdoutSocket)
+      discard stdoutSocket.recvLine(inp)
+      execute inp.string
+      stdoutSocket.send("\c\L")
+      stdoutSocket.close()
+  else:
+    echo "Invalid server.type:", typ
+    quit 1
+
diff --git a/compiler/syntaxes.nim b/compiler/syntaxes.nim
index 1124e6444..3965cb3fe 100755
--- a/compiler/syntaxes.nim
+++ b/compiler/syntaxes.nim
@@ -30,8 +30,8 @@ type
     parser*: TParser
 
 
-proc ParseFile*(filename: string): PNode{.procvar.}
-proc openParsers*(p: var TParsers, filename: string, inputstream: PLLStream)
+proc parseFile*(fileIdx: int32): PNode{.procvar.}
+proc openParsers*(p: var TParsers, fileIdx: int32, inputstream: PLLStream)
 proc closeParsers*(p: var TParsers)
 proc parseAll*(p: var TParsers): PNode
 proc parseTopLevelStmt*(p: var TParsers): PNode
@@ -40,14 +40,15 @@ proc parseTopLevelStmt*(p: var TParsers): PNode
 
 # implementation
 
-proc ParseFile(filename: string): PNode = 
+proc ParseFile(fileIdx: int32): PNode =
   var 
     p: TParsers
     f: tfile
-  if not open(f, filename): 
+  let filename = fileIdx.toFullPath
+  if not open(f, filename):
     rawMessage(errCannotOpenFile, filename)
     return 
-  OpenParsers(p, filename, LLStreamOpen(f))
+  OpenParsers(p, fileIdx, LLStreamOpen(f))
   result = ParseAll(p)
   CloseParsers(p)
 
@@ -160,15 +161,16 @@ proc evalPipe(p: var TParsers, n: PNode, filename: string,
   else: 
     result = applyFilter(p, n, filename, result)
   
-proc openParsers(p: var TParsers, filename: string, inputstream: PLLStream) = 
+proc openParsers(p: var TParsers, fileIdx: int32, inputstream: PLLStream) = 
   var s: PLLStream
   p.skin = skinStandard
+  let filename = fileIdx.toFullPath
   var pipe = parsePipe(filename, inputStream)
   if pipe != nil: s = evalPipe(p, pipe, filename, inputStream)
   else: s = inputStream
   case p.skin
-  of skinStandard, skinBraces, skinEndX: 
-    parser.openParser(p.parser, filename, s)
+  of skinStandard, skinBraces, skinEndX:
+    parser.openParser(p.parser, fileIdx, s)
   
 proc closeParsers(p: var TParsers) = 
   parser.closeParser(p.parser)
diff --git a/compiler/transf.nim b/compiler/transf.nim
index 6633d7755..679f7d12f 100755
--- a/compiler/transf.nim
+++ b/compiler/transf.nim
@@ -696,12 +696,8 @@ when false:
     result = openTransf(module, filename)
     for m in items(rd.methods): methodDef(m, true)
 
-  proc transfPass(): TPass = 
-    initPass(result)
-    result.open = openTransf
-    result.openCached = openTransfCached
-    result.process = processTransf
-    result.close = processTransf # we need to process generics too!
+  const transfPass* = makePass(openTransf, openTransfCached,
+    processTransf, processTransf) # we need to process generics too!
   
 proc transformBody*(module: PSym, n: PNode, prc: PSym): PNode =
   if nfTransf in n.flags or prc.kind in {skTemplate, skMacro}:
diff --git a/doc/advopt.txt b/doc/advopt.txt
index 5d3544a76..52c2c8636 100755
--- a/doc/advopt.txt
+++ b/doc/advopt.txt
@@ -16,6 +16,7 @@ Advanced commands:
     --def                   list all possible definitions at position
     --context               list possible invokation context
     --usages                list all usages of the symbol at position
+    --eval                  evaluates an expression
 
 Advanced options:
   -m, --mainmodule:FILE     set the project main module
@@ -51,6 +52,8 @@ Advanced options:
                             (Nimrod, mangled) identifier pairs
   --project                 document the whole project (doc2)
   --lineDir:on|off          generation of #line directive on|off
+  --embedsrc                embeds the original source code as comments
+                            in the generated output
   --threadanalysis:on|off   turn thread analysis on|off
   --tlsEmulation:on|off     turn thread local storage emulation on|off
   --taintMode:on|off        turn taint mode on|off
diff --git a/lib/nimbase.h b/lib/nimbase.h
index 7fb70a60c..573ad16f7 100755
--- a/lib/nimbase.h
+++ b/lib/nimbase.h
@@ -436,6 +436,17 @@ struct TFrame {
   NI len;
 };
 
+#define nimfr(proc, file) \
+  volatile TFrame F; \
+  F.procname = proc; F.filename = file; F.line = 0; F.len = 0; pushFrame(&F);
+
+#define nimfrs(proc, file, slots) \
+  volatile struct {TFrame* prev;NCSTRING procname;NI line;NCSTRING filename; NI len; TVarSlot s[slots];} F; \
+  F.procname = proc; F.filename = file; F.line = 0; F.len = slots; pushFrame((TFrame*)&F);
+
+#define nimln(n, file) \
+  F.line = n; F.filename = file;
+
 #define NIM_POSIX_INIT  __attribute__((constructor)) 
 
 #if defined(_MSCVER) && defined(__i386__)
diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim
index 212aab493..232205ebd 100755
--- a/lib/pure/streams.nim
+++ b/lib/pure/streams.nim
@@ -180,8 +180,12 @@ proc readLine*(s: PStream): TaintedString =
     if c == '\c': 
       c = readChar(s)
       break
-    elif c == '\L' or c == '\0': break
-    result.string.add(c)
+    if c == '\b':
+      result.string.setLen(result.len - 1)
+    elif c == '\L' or c == '\0':
+      break
+    else:
+      result.string.add(c)
 
 type
   PStringStream* = ref TStringStream ## a stream that encapsulates a string
diff --git a/lib/system.nim b/lib/system.nim
index 892f4f8c5..b4c265f62 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -186,9 +186,9 @@ when not defined(niminheritable):
 
 when not defined(EcmaScript) and not defined(NimrodVM):
   type
-    TGenericSeq {.compilerproc, pure, inheritable.} = object
+    TGenericSeq* {.compilerproc, pure, inheritable.} = object
       len, reserved: int
-    PGenericSeq {.exportc.} = ptr TGenericSeq
+    PGenericSeq* {.exportc.} = ptr TGenericSeq
     # len and space without counting the terminating zero:
     NimStringDesc {.compilerproc, final.} = object of TGenericSeq
       data: array[0..100_000_000, char]
@@ -565,7 +565,7 @@ proc abs*(x: int64): int64 {.magic: "AbsI64", noSideEffect.}
   ## checking is turned on).
 
 type
-  IntMax32 = int|int8|int16|int32
+  IntMax32 = bool|int|int8|int16|int32
 
 proc `+%` *(x, y: IntMax32): IntMax32 {.magic: "AddU", noSideEffect.}
 proc `+%` *(x, y: Int64): Int64 {.magic: "AddU", noSideEffect.}
@@ -2486,3 +2486,17 @@ proc compiles*(x: expr): bool {.magic: "Compiles", noSideEffect.} =
 
 when defined(initDebugger):
   initDebugger()
+
+# XXX: make these the default (or implement the NilObject optimization)
+proc safeAdd*[T](x: var seq[T], y: T) {.noSideEffect.} =
+  if x == nil: x = @[y]
+  else: x.add(y)
+
+proc safeAdd*(x: var string, y: char) =
+  if x == nil: x = ""
+  x.add(y)
+
+proc safeAdd*(x: var string, y: string) =
+  if x == nil: x = y
+  else: x.add(y)
+
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index b860ef38c..5de4ca811 100755
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -10,10 +10,12 @@
 # Efficient set of pointers for the GC (and repr)
 
 type
+  TRefCount = int
+
   TCell {.pure.} = object
-    refcount: int  # the refcount and some flags
+    refcount: TRefCount  # the refcount and some flags
     typ: PNimType
-    when leakDetector:
+    when trackAllocationSource:
       filename: cstring
       line: int
 
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ec656e0ef..bc3474adf 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -32,20 +32,73 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 const
-  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
-  # NOTE: Most colors are currently unused
-  rcBlack = 0b000  # cell is colored black; in use or free
-  rcGray = 0b001   # possible member of a cycle
-  rcWhite = 0b010  # member of a garbage cycle
-  rcPurple = 0b011 # possible root of a cycle
-  rcZct = 0b100    # in ZCT
-  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
-  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
-  rcShift = 3      # shift by rcShift to get the reference counter
-  colorMask = 0b111
+  rcShift = 6 # the reference count is shifted so we can use
+              # the least significat bits for additinal flags:
+
+  rcAlive = 0b00000           # object is reachable.
+                              # color *black* in the original paper
+                              
+  rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
+
+  rcDecRefApplied = 0b00010   # the first dec-ref phase of the
+                              # collector was already applied to this
+                              # object. *gray*
+                              
+  rcMaybeDead = 0b00011       # this object is a candidate for deletion
+                              # during the collect cycles algorithm.
+                              # *white*.
+                              
+  rcReallyDead = 0b00100      # this is proved to be garbage
+  
+  rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
+                              # was replaced by a resize operation.
+                              # see growObj for details
+
+  rcColorMask = TRefCount(0b00111)
+
+  rcZct = 0b01000             # already added to ZCT
+  rcInCycleRoots = 0b10000    # already buffered as cycle candidate
+  rcHasStackRef = 0b100000    # the object had a stack ref in the last
+                              # cycle collection
+
+  rcMarkBit = rcHasStackRef   # this is currently used for leak detection
+                              # when traceGC is on
+
+  rcBufferedAnywhere = rcZct or rcInCycleRoots
+
+  rcIncrement = 1 shl rcShift # don't touch the color bits
+
+const
+  NewObjectsAreCycleRoots = true
+    # the alternative is to use the old strategy of adding cycle roots
+    # in incRef (in the compiler itself, this doesn't change much)
+
+  IncRefRemovesCandidates = false
+    # this is safe only if we can reliably track the fact that the object
+    # has stack references. This could be easily done by adding another bit
+    # to the refcount field and setting it up in unmarkStackAndRegisters.
+    # The bit must also be set for new objects that are not rc1 and it must be
+    # examined in the decref loop in collectCycles.
+    # XXX: not implemented yet as tests didn't show any improvement from this
+   
+  MarkingSkipsAcyclicObjects = false
+    # Acyclic objects can be safely ignored in the mark and scan phases, 
+    # because they cannot contribute to the internal count.
+    # XXX: if we generate specialized `markCyclic` and `markAcyclic`
+    # procs we can further optimize this as there won't be need for any
+    # checks in the code
+  
+  MinimumStackMarking = false
+    # Try to scan only the user stack and ignore the part of the stack
+    # belonging to the GC itself. see setStackTop for further info.
+    # XXX: still has problems in release mode in the compiler itself.
+    # investigate how it affects growObj
+
+  CollectCyclesStats = false
+
 type
   TWalkOp = enum
-    waZctDecRef, waPush, waCycleDecRef
+    waPush
 
   TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
     # A ref type can have a finalizer that is called before the object's
@@ -63,19 +116,28 @@ type
   TGcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
     stackBottom: pointer
+    stackTop: pointer
     cycleThreshold: int
     zct: TCellSeq            # the zero count table
     decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSet
+    cycleRoots: TCellSeq
     tempStack: TCellSeq      # temporary stack for recursion elimination
+    freeStack: TCellSeq      # objects ready to be freed
     recGcLock: int           # prevent recursion via finalizers; no thread lock
+    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the 
+                             # cycle roots table that uses a cheap linear scan
+                             # to find only possitively dead objects.
+                             # One strategy is to perform it only for new objects
+                             # allocated between the invocations of CollectZCT.
+                             # This index indicates the start of the range of
+                             # such new objects within the table.
     when withRealTime:
       maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
     region: TMemRegion       # garbage collected region
     stat: TGcStat
 
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch* {.rtlThreadVar.}: TGcHeap
 
 when not defined(useNimRtl):
   InstantiateForRegion(gch.region)
@@ -88,16 +150,92 @@ template release(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
-proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
-  if (c.refcount and rcZct) == 0:
-    c.refcount = c.refcount and not colorMask or rcZct
-    add(s, c)
+template setColor(c: PCell, color) =
+  c.refcount = (c.refcount and not rcColorMask) or color
+
+template color(c: PCell): expr =
+  c.refcount and rcColorMask
+
+template isBitDown(c: PCell, bit): expr =
+  (c.refcount and bit) == 0
+
+template isBitUp(c: PCell, bit): expr =
+  (c.refcount and bit) != 0
+
+template setBit(c: PCell, bit): expr =
+  c.refcount = c.refcount or bit
+
+template isDead(c: Pcell): expr =
+  c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
+
+template clearBit(c: PCell, bit): expr =
+  c.refcount = c.refcount and (not TRefCount(bit))
+
+when debugGC:
+  var gcCollectionIdx = 0
+
+  proc colorStr(c: PCell): cstring =
+    let color = c.color
+    case color
+    of rcAlive: return "alive"
+    of rcMaybeDead: return "maybedead"
+    of rcCycleCandidate: return "candidate"
+    of rcDecRefApplied: return "marked"
+    of rcRetiredBuffer: return "retired"
+    of rcReallyDead: return "dead"
+    else: return "unknown?"
+  
+  proc inCycleRootsStr(c: PCell): cstring =
+    if c.isBitUp(rcInCycleRoots): result = "cycleroot"
+    else: result = ""
+
+  proc inZctStr(c: PCell): cstring =
+    if c.isBitUp(rcZct): result = "zct"
+    else: result = ""
+
+  proc writeCell*(msg: CString, c: PCell, force = false) =
+    var kind = -1
+    if c.typ != nil: kind = ord(c.typ.kind)
+    when trackAllocationSource:
+      c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n",
+                gcCollectionIdx,
+                msg, c, kind, c.refcount shr rcShift,
+                c.colorStr, c.inCycleRootsStr, c.inZctStr,
+                c.filename, c.line)
+    else:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+                msg, c, kind, c.refcount shr rcShift)
+
+proc addZCT(zct: var TCellSeq, c: PCell) {.noinline.} =
+  if c.isBitDown(rcZct):
+    c.setBit rcZct
+    zct.add c
+
+template setStackTop(gch) =
+  # This must be called immediately after we enter the GC code
+  # to minimize the size of the scanned stack. The stack consumed
+  # by the GC procs may amount to 200-400 bytes depending on the
+  # build settings and this contributes to false-positives
+  # in the conservative stack marking
+  when MinimumStackMarking:
+    var stackTop {.volatile.}: pointer
+    gch.stackTop = addr(stackTop)
+
+template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
+  if c.color != rcCycleCandidate:
+    c.setColor rcCycleCandidate
+    
+    # the object may be buffered already. for example, consider:
+    # decref; incref; decref
+    if c.isBitDown(rcInCycleRoots):
+      c.setBit rcInCycleRoots
+      cycleRoots.add c
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
   result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
 
-proc usrToCell(usr: pointer): PCell {.inline.} =
+proc usrToCell*(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
 
@@ -115,22 +253,30 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
-when debugGC:
-  proc writeCell(msg: CString, c: PCell) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when leakDetector:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
-                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
 
 when traceGC:
   # traceGC is a special switch to enable extensive debugging
   type
     TCellState = enum
-      csAllocated, csZctFreed, csCycFreed
+      csAllocated, csFreed
   var
     states: array[TCellState, TCellSet]
 
@@ -140,155 +286,197 @@ when traceGC:
       if c in states[csAllocated]:
         writeCell("attempt to alloc an already allocated cell", c)
         sysAssert(false, "traceCell 1")
-      excl(states[csCycFreed], c)
-      excl(states[csZctFreed], c)
-    of csZctFreed:
-      if c in states[csZctFreed]:
-        writeCell("attempt to free zct cell twice", c)
+      excl(states[csFreed], c)
+      # writecell("allocated", c)
+    of csFreed:
+      if c in states[csFreed]:
+        writeCell("attempt to free a cell twice", c)
         sysAssert(false, "traceCell 2")
-      if c in states[csCycFreed]:
-        writeCell("attempt to free with zct, but already freed with cyc", c)
-        sysAssert(false, "traceCell 3")
       if c notin states[csAllocated]:
         writeCell("attempt to free not an allocated cell", c)
-        sysAssert(false, "traceCell 4")
-      excl(states[csAllocated], c)
-    of csCycFreed:
-      if c notin states[csAllocated]:
-        writeCell("attempt to free a not allocated cell", c)
-        sysAssert(false, "traceCell 5")
-      if c in states[csCycFreed]:
-        writeCell("attempt to free cyc cell twice", c)
-        sysAssert(false, "traceCell 6")
-      if c in states[csZctFreed]:
-        writeCell("attempt to free with cyc, but already freed with zct", c)
-        sysAssert(false, "traceCell 7")
+        sysAssert(false, "traceCell 3")
       excl(states[csAllocated], c)
+      # writecell("freed", c)
     incl(states[state], c)
 
-  proc writeLeakage() =
-    var z = 0
-    var y = 0
-    var e = 0
+  proc computeCellWeight(c: PCell): int =
+    var x: TCellSet
+    x.init
+
+    let startLen = gch.tempStack.len
+    c.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c in states[csFreed]: continue
+      inc result
+      if c notin x:
+        x.incl c
+        c.forAllChildren waPush
+
+  template markChildrenRec(cell) =
+    let startLen = gch.tempStack.len
+    cell.forAllChildren waPush
+    let isMarked = cell.isBitUp(rcMarkBit)
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c in states[csFreed]: continue
+      if c.isBitDown(rcMarkBit):
+        c.setBit rcMarkBit
+        c.forAllChildren waPush
+    if c.isBitUp(rcMarkBit) and not isMarked:
+      writecell("cyclic cell", cell)
+      cprintf "Weight %d\n", cell.computeCellWeight
+      
+  proc writeLeakage(onlyRoots: bool) =
+    if onlyRoots:
+      for c in elements(states[csAllocated]):
+        if c notin states[csFreed]:
+          markChildrenRec(c)
+    var f = 0
+    var a = 0
     for c in elements(states[csAllocated]):
-      inc(e)
-      if c in states[csZctFreed]: inc(z)
-      elif c in states[csCycFreed]: inc(y)
-      else: writeCell("leak", c)
-    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
-             e, z, y)
+      inc a
+      if c in states[csFreed]: inc f
+      elif c.isBitDown(rcMarkBit):
+        writeCell("leak", c)
+        cprintf "Weight %d\n", c.computeCellWeight
+    cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f)
 
 template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when logGC: writeCell($state, cell)
   when traceGC: traceCell(cell, state)
 
-# forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
-# we need the prototype here for debugging purposes
-
-when hasThreadSupport and hasSharedHeap:
-  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
-  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
-else:
-  template `--`(x: expr): expr = 
-    Dec(x, rcIncrement)
-    x <% rcIncrement
-  template `++`(x: expr): stmt = Inc(x, rcIncrement)
-
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
+template WithHeapLock(blk: stmt): stmt =
+  when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock)
+  blk
+  when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
 
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-  incl(gch.cycleRoots, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
+  WithHeapLock: addCycleRoot(gch.cycleRoots, c)
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-  addZCT(gch.zct, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
+  WithHeapLock: addZCT(gch.zct, c)
+
+type
+  TCyclicMode = enum
+    Cyclic,
+    Acyclic,
+    MaybeCyclic
+
+  TReleaseType = enum
+    AddToZTC
+    FreeImmediately
+
+  THeapType = enum
+    LocalHeap
+    SharedHeap
+
+template `++` (rc: TRefCount, heapType: THeapType): stmt =
+  when heapType == SharedHeap:
+    discard atomicInc(rc, rcIncrement)
+  else:
+    inc rc, rcIncrement
 
-proc decRef(c: PCell) {.inline.} =
+template `--`(rc: TRefCount): expr =
+  dec rc, rcIncrement
+  rc <% rcIncrement
+
+template `--` (rc: TRefCount, heapType: THeapType): expr =
+  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
+   else: --rc)
+
+template doDecRef(cc: PCell,
+                  heapType = LocalHeap,
+                  cycleFlag = MaybeCyclic): stmt =
+  var c = cc
   sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  # XXX: move this elesewhere
+
   sysAssert(c.refcount >=% rcIncrement, "decRef")
-  if --c.refcount:
+  if c.refcount--(heapType):
+    # this is the last reference from the heap
+    # add to a zero-count-table that will be matched against stack pointers
     rtlAddZCT(c)
-  elif canBeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c) 
-
-proc incRef(c: PCell) {.inline.} = 
-  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
-  ++c.refcount
-  if canBeCycleRoot(c):
-    rtlAddCycleRoot(c)
-
-proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+  else:
+    when cycleFlag != Acyclic:
+      if cycleFlag == Cyclic or canBeCycleRoot(c):
+        # a cycle may have been broken
+        rtlAddCycleRoot(c)
+
+template doIncRef(cc: PCell,
+                 heapType = LocalHeap,
+                 cycleFlag = MaybeCyclic): stmt =
+  var c = cc
+  c.refcount++(heapType)
+  when cycleFlag != Acyclic:
+    when NewObjectsAreCycleRoots:
+      if canbeCycleRoot(c):
+        addCycleRoot(gch.cycleRoots, c)
+    elif IncRefRemovesCandidates:
+      c.setColor rcAlive
+  # XXX: this is not really atomic enough!
+  
+proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
 
 proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
   var c = usrToCell(p)
   sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if --c.refcount:
+  if c.refcount--(LocalHeap):
     rtlAddZCT(c)
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc!
+template doAsgnRef(dest: ppointer, src: pointer,
+                  heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
   sysAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
-  if src != nil: incRef(usrToCell(src))
-  if dest[] != nil: decRef(usrToCell(dest[]))
+  if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag)
+  if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
   dest[] = src
 
+proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
+
 proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc if it is known at compile time that no 
   # cycle is possible.
-  if src != nil:
-    var c = usrToCell(src)
-    ++c.refcount
-  if dest[] != nil: 
-    var c = usrToCell(dest[])
-    if --c.refcount:
-      rtlAddZCT(c)
-  dest[] = src
+  doAsgnRef(dest, src, LocalHeap, Acyclic)
 
 proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
   if not IsOnStack(dest):
-    if src != nil: incRef(usrToCell(src))
+    if src != nil: doIncRef(usrToCell(src))
+    # XXX we must detect a shared heap here
+    # better idea may be to just eliminate the need for unsureAsgnRef
+    #
     # XXX finally use assembler for the stack checking instead!
     # the test for '!= nil' is correct, but I got tired of the segfaults
     # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+    if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil,
               "stack loc AND interior pointer")
   dest[] = src
 
+when hasThreadSupport and hasSharedHeap:
+  # shared heap version of the above procs
+  proc asgnRefSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+    doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
+
+  proc asgnRefNoCycleSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+    doAsgnRef(dest, src, SharedHeap, Acyclic)
+
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
@@ -303,6 +491,7 @@ proc initGC() =
     # init the rt
     init(gch.zct)
     init(gch.tempStack)
+    init(gch.freeStack)
     Init(gch.cycleRoots)
     Init(gch.decStack)
 
@@ -355,9 +544,10 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
       var d = cast[TAddress](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
+        let baseAddr = d +% GenericSeqSize
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
+          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
+                            cell.typ.base, op)
     else: nil
 
 proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
@@ -378,7 +568,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
     template replaceZctEntry(i: expr) =
       c = d[i]
       if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not colorMask
+        c.clearBit(rcZct)
         d[i] = res
         return
     if L > 8:
@@ -399,84 +589,108 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
     for i in countdown(L-1, max(0, L-8)):
       var c = d[i]
       if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not colorMask
+        c.clearBit(rcZct)
         d[i] = res
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
   # generates a new object and sets its reference counter to 0
   acquire(gch)
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
   sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  
   collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  sysAssert(allocInv(gch.region), "rawNewObj after collect")
+
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
+
   sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
+  
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
+  
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  
+  if rc1:
+    res.refcount = rcIncrement # refcount is 1
+  else:
+    # its refcount is zero, so add it to the ZCT:
+    res.refcount = rcZct
+    addNewObjToZCT(res, gch)
+
+    if NewObjectsAreCycleRoots and canBeCycleRoot(res):
+      res.setBit(rcInCycleRoots)
+      res.setColor rcCycleCandidate
+      gch.cycleRoots.add res
+    
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  # its refcount is zero, so add it to the ZCT:
-  addNewObjToZCT(res, gch)
+  
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
   release(gch)
   result = cellToUsr(res)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
 {.pop.}
 
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
+proc freeCell(gch: var TGcHeap, c: PCell) =
+  # prepareDealloc(c)
+  gcTrace(c, csFreed)
+
+  when reallyDealloc: rawDealloc(gch.region, c)
+  else:
+    sysAssert(c.typ != nil, "collectCycles")
+    zeroMem(c, sizeof(TCell))
+
+template eraseAt(cells: var TCellSeq, at: int): stmt =
+  cells.d[at] = cells.d[cells.len - 1]
+  dec cells.len
+
+template trimAt(roots: var TCellSeq, at: int): stmt =
+  # This will remove a cycle root candidate during trimming.
+  # a candidate is removed either because it received a refup and
+  # it's no longer a candidate or because it received further refdowns
+  # and now it's dead for sure.
+  let c = roots.d[at]
+  c.clearBit(rcInCycleRoots)
+  roots.eraseAt(at)
+  if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement:
+    # This case covers both dead objects and retired buffers
+    # That's why we must also check the refcount (it may be
+    # kept possitive by stack references).
+    freeCell(gch, c)
 
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  setStackTop(gch)
+  result = rawNewObj(typ, size, gch, false)
+  
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
+  setStackTop(gch)
+  # `rawNewObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
+  result = rawNewObj(typ, size, gch, false)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  # generates a new object and sets its reference counter to 1
-  sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  acquire(gch)
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
-  
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcIncrement # refcount is 1
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  release(gch)
-  result = cellToUsr(res)
-  zeroMem(result, size)
-  sysAssert(allocInv(gch.region), "newObjRC1 end")
-  when defined(memProfiler): nimProfile(size)
+  setStackTop(gch)
+  result = rawNewObj(typ, size, gch, true)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  setStackTop(gch)
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObjRC1(typ, size)
+  result = rawNewObj(typ, size, gch, true)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-  
+
 proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   acquire(gch)
   collectCT(gch)
@@ -486,43 +700,73 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
+                 else: 1
   
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  
+  # XXX: This should happen outside
+  # call user-defined move code
+  # call user-defined default constructor
   copyMem(res, ol, oldsize + sizeof(TCell))
   zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
           newsize-oldsize)
+
   sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  #if res.refcount <% rcIncrement:
-  #  add(gch.zct, res)
-  #else: # XXX: what to do here?
-  #  decRef(ol)
-  if (ol.refcount and colorMask) == rcZct:
-    var j = gch.zct.len-1
-    var d = gch.zct.d
-    while j >= 0: 
-      if d[j] == ol:
-        d[j] = res
-        break
-      dec(j)
-  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
-  gcTrace(ol, csZctFreed)
-  gcTrace(res, csAllocated)
-  when reallyDealloc: rawDealloc(gch.region, ol)
+  
+  when false:
+    if ol.isBitUp(rcZct):
+      var j = gch.zct.len-1
+      var d = gch.zct.d
+      while j >= 0: 
+        if d[j] == ol:
+          d[j] = res
+          break
+        dec(j)
+    
+    if ol.isBitUp(rcInCycleRoots):
+      for i in 0 .. <gch.cycleRoots.len:
+        if gch.cycleRoots.d[i] == ol:
+          eraseAt(gch.cycleRoots, i)
+
+    freeCell(gch, ol)
+  
   else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(TCell))
+    # the new buffer inherits the GC state of the old one
+    if res.isBitUp(rcZct): gch.zct.add res
+    if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res
+
+    # Pay attention to what's going on here! We're not releasing the old memory.
+    # This is because at this point there may be an interior pointer pointing
+    # into this buffer somewhere on the stack (due to `var` parameters now and
+    # and `let` and `var:var` stack locations in the future).
+    # We'll release the memory in the next GC cycle. If we release it here,
+    # we cannot guarantee that no memory will be corrupted when only safe
+    # language features are used. Accessing the memory after the seq/string
+    # has been invalidated may still result in logic errors in the user code.
+    # We may improve on that by protecting the page in debug builds or
+    # by providing a warning when we detect a stack pointer into it.
+    let bufferFlags = ol.refcount and rcBufferedAnywhere
+    if bufferFlags == 0:
+      # we need this in order to collect it safely later
+      ol.refcount = rcRetiredBuffer or rcZct
+      gch.zct.add ol
+    else:
+      ol.refcount = rcRetiredBuffer or bufferFlags
+
+    when logGC:
+      writeCell("growObj old cell", ol)
+      writeCell("growObj new cell", res)
+
+  gcTrace(res, csAllocated)
   release(gch)
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  setStackTop(gch)
   result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
@@ -533,70 +777,214 @@ proc doOperation(p: pointer, op: TWalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   sysAssert(c != nil, "doOperation: 1")
-  case op # faster than function pointers because of easy prediction
-  of waZctDecRef:
-    #if not isAllocatedPtr(gch.region, c):
-    #  return
-    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
-    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
-    c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
-    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
-  of waPush:
-    add(gch.tempStack, c)
-  of waCycleDecRef:
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-
+  gch.tempStack.add c
+  
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, TWalkOp(op))
 
+type
+  TRecursionType = enum 
+    FromChildren,
+    FromRoot
+
+proc CollectZCT(gch: var TGcHeap): bool
+
+template pseudoRecursion(typ: TRecursionType, body: stmt): stmt =
+  #
+
+proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
+  var i = startIdx
+  while i < gch.cycleRoots.len:
+    if gch.cycleRoots.d[i].color != rcCycleCandidate:
+      gch.cycleRoots.trimAt i
+    else:
+      inc i
+
+  gch.cycleRootsTrimIdx = gch.cycleRoots.len
+
 # we now use a much simpler and non-recursive algorithm for cycle removal
 proc collectCycles(gch: var TGcHeap) =
-  var tabSize = 0
-  for c in elements(gch.cycleRoots):
-    inc(tabSize)
-    forallChildren(c, waCycleDecRef)
-  if tabSize == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
-  # restore reference counts (a depth-first traversal is needed):
-  var marker: TCellSet
-  Init(marker)
-  for c in elements(gch.cycleRoots):
-    if c.refcount >=% rcIncrement:
-      if not containsOrIncl(marker, c):
-        gch.tempStack.len = 0
-        forAllChildren(c, waPush)
-        while gch.tempStack.len > 0:
-          dec(gch.tempStack.len)
-          var d = gch.tempStack.d[gch.tempStack.len]
-          d.refcount = d.refcount +% rcIncrement
-          if d in gch.cycleRoots and not containsOrIncl(marker, d):
-            forAllChildren(d, waPush)
-  # remove cycles:
-  for c in elements(gch.cycleRoots):
-    if c.refcount <% rcIncrement:
-      gch.tempStack.len = 0
-      forAllChildren(c, waPush)
-      while gch.tempStack.len > 0:
-        dec(gch.tempStack.len)
-        var d = gch.tempStack.d[gch.tempStack.len]
-        if d.refcount <% rcIncrement:
-          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
-            addZCT(gch.zct, d)
-            when logGC: writeCell("add to ZCT (from cycle collector)", d)
-      prepareDealloc(c)
-      gcTrace(c, csCycFreed)
-      when logGC: writeCell("cycle collector dealloc cell", c)
-      when reallyDealloc: rawDealloc(gch.region, c)
+  if gch.cycleRoots.len == 0: return
+  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
+
+  when CollectCyclesStats:
+    let l0 = gch.cycleRoots.len
+    let tStart = getTicks()
+
+  var
+    decrefs = 0
+    increfs = 0
+    collected = 0
+    maybedeads = 0
+
+  template ignoreObject(c: PCell): expr =
+    # This controls which objects will be ignored in the mark and scan stages
+    (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false)
+    # not canbeCycleRoot(c)
+    # false
+    # c.isBitUp(rcHasStackRef)
+
+  template earlyMarkAliveRec(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcAlive
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c.color != rcAlive:
+        c.setColor rcAlive
+        c.forAllChildren waPush
+  
+  template earlyMarkAlive(stackRoots) =
+    # This marks all objects reachable from the stack as alive before any
+    # of the other stages is executed. Such objects cannot be garbage and
+    # they don't need to participate in the recursive decref/incref.
+    for i in 0 .. <stackRoots.len:
+      var c = stackRoots.d[i]
+      # c.setBit rcHasStackRef
+      earlyMarkAliveRec(c)
+
+  earlyMarkAlive(gch.decStack)
+  
+  when CollectCyclesStats:
+    let tAfterEarlyMarkAlive = getTicks()
+
+  template recursiveDecRef(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcDecRefApplied
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if ignoreObject(c): continue
+
+      sysAssert(c.refcount >=% rcIncrement, "recursive dec ref")
+      dec c.refcount, rcIncrement
+      inc decrefs
+      if c.color != rcDecRefApplied:
+        c.setColor rcDecRefApplied
+        c.forAllChildren waPush
+ 
+  template markRoots(roots) =
+    var i = 0
+    while i < roots.len:
+      if roots.d[i].color == rcCycleCandidate:
+        recursiveDecRef(roots.d[i])
+        inc i
       else:
-        sysAssert(c.typ != nil, "collectCycles")
-        zeroMem(c, sizeof(TCell))
+        roots.trimAt i
+  
+  markRoots(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tAfterMark = getTicks()
+    c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
+  
+  template recursiveMarkAlive(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcAlive
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if ignoreObject(c): continue
+      inc c.refcount, rcIncrement
+      inc increfs
+      
+      if c.color != rcAlive:
+        c.setColor rcAlive
+        c.forAllChildren waPush
+ 
+  template scanRoots(roots) =
+    for i in 0 .. <roots.len:
+      let startLen = gch.tempStack.len
+      gch.tempStack.add roots.d[i]
+      
+      while startLen != gch.tempStack.len:
+        dec gch.tempStack.len
+        var c = gch.tempStack.d[gch.tempStack.len]
+        if ignoreObject(c): continue
+        if c.color == rcDecRefApplied:
+          if c.refcount >=% rcIncrement:
+            recursiveMarkAlive(c)
+          else:
+            # note that this is not necessarily the ultimate
+            # destiny of the object. we may still mark it alive
+            # later if we encounter another node from where it's
+            # reachable.
+            c.setColor rcMaybeDead
+            inc maybedeads
+            c.forAllChildren waPush
+  
+  scanRoots(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tAfterScan = getTicks()
+
+  template collectDead(roots) =
+    for i in 0 .. <roots.len:
+      var c = roots.d[i]
+      c.clearBit(rcInCycleRoots)
+
+      let startLen = gch.tempStack.len
+      gch.tempStack.add c
+      
+      while startLen != gch.tempStack.len:
+        dec gch.tempStack.len
+        var c = gch.tempStack.d[gch.tempStack.len]
+        when MarkingSkipsAcyclicObjects:
+          if not canbeCycleRoot(c):
+            # This is an acyclic object reachable from a dead cyclic object
+            # We must do a normal decref here that may add the acyclic object
+            # to the ZCT
+            doDecRef(c, LocalHeap, Cyclic)
+            continue
+        if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots):
+          c.setColor(rcReallyDead)
+          inc collected
+          c.forAllChildren waPush
+          # we need to postpone the actual deallocation in order to allow
+          # the finalizers to run while the data structures are still intact
+          gch.freeStack.add c
+          prepareDealloc(c)
+
+    for i in 0 .. <gch.freeStack.len:
+      freeCell(gch, gch.freeStack.d[i])
+
+  collectDead(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tFinal = getTicks()
+    cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
+      (tAfterEarlyMarkAlive - tStart)  div 1_000_000,
+      (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000,
+      (tAfterScan - tAfterMark) div 1_000_000,
+      (tFinal - tAfterScan) div 1_000_000,
+      decrefs,
+      increfs,
+      maybedeads,
+      collected
+
   Deinit(gch.cycleRoots)
   Init(gch.cycleRoots)
 
+  Deinit(gch.freeStack)
+  Init(gch.freeStack)
+
+  when MarkingSkipsAcyclicObjects:
+    # Collect the acyclic objects that became unreachable due to collected
+    # cyclic objects. 
+    discard CollectZCT(gch)
+    # CollectZCT may add new cycle candidates and we may decide to loop here
+    # if gch.cycleRoots.len > 0: repeat
+
+var gcDebugging* = false
+
+var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+
 proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
@@ -607,13 +995,26 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
     if objStart != nil:
       # mark the cell:
-      objStart.refcount = objStart.refcount +% rcIncrement
-      add(gch.decStack, objStart)
+      if objStart.color != rcReallyDead:
+        if gcDebugging:
+          # writeCell("marking ", objStart)
+        else:
+          inc objStart.refcount, rcIncrement
+          gch.decStack.add objStart
+      else:
+        # With incremental clean-up, objects spend some time
+        # in various lists before being deallocated.
+        # We just found a reference on the stack to an object,
+        # which we have previously labeled as unreachable.
+        # This is either a bug in the GC or a pure accidental
+        # coincidence due to the conservative stack marking.
+        when debugGC:
+          # writeCell("marking dead object", objStart)
     when false:
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
         # mark the cell:
-        cell.refcount = cell.refcount +% rcIncrement
+        inc cell.refcount, rcIncrement
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
@@ -664,6 +1065,11 @@ proc stackSize(): int {.noinline.} =
   var stackTop {.volatile.}: pointer
   result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
 
+var
+  jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+    # a little hack to get the size of a TJmpBuf in the generated C code
+    # in a platform independant way
+
 when defined(sparc): # For SPARC architecture.
   proc isOnStack(p: pointer): bool =
     var stackTop {.volatile.}: pointer
@@ -703,12 +1109,7 @@ elif stackIncreases:
     var b = cast[TAddress](stackTop)
     var x = cast[TAddress](p)
     result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
-
+  
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
@@ -739,8 +1140,20 @@ else:
     type PStackSlice = ptr array [0..7, pointer]
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      when MinimumStackMarking:
+        # mark the registers
+        var jmpbufPtr = cast[TAddress](addr(registers))
+        var jmpbufEnd = jmpbufPtr +% jmpbufSize
+      
+        while jmpbufPtr <=% jmpbufEnd:
+          gcMark(gch, cast[ppointer](jmpbufPtr)[])
+          jmpbufPtr = jmpbufPtr +% sizeof(pointer)
+
+        var sp = cast[TAddress](gch.stackTop)
+      else:
+        var sp = cast[TAddress](addr(registers))
+      # mark the user stack
       var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers))
       # loop unrolled:
       while sp <% max - 8*sizeof(pointer):
         gcMark(gch, cast[PStackSlice](sp)[0])
@@ -761,11 +1174,36 @@ else:
 # end of non-portable code
 # ----------------------------------------------------------------------------
 
+proc releaseCell(gch: var TGcHeap, cell: PCell) =
+  if cell.color != rcReallyDead:
+    prepareDealloc(cell)
+    cell.setColor rcReallyDead
+
+    let l1 = gch.tempStack.len
+    cell.forAllChildren waPush
+    let l2 = gch.tempStack.len
+    for i in l1 .. <l2:
+      var cc = gch.tempStack.d[i]
+      if cc.refcount--(LocalHeap):
+        releaseCell(gch, cc)
+      else:
+        if canbeCycleRoot(cc):
+          addCycleRoot(gch.cycleRoots, cc)
+
+    gch.tempStack.len = l1
+
+  if cell.isBitDown(rcBufferedAnywhere):
+    freeCell(gch, cell)
+  # else:
+  # This object is either buffered in the cycleRoots list and we'll leave
+  # it there to be collected in the next collectCycles or it's pending in
+  # the ZCT:
+  # (e.g. we are now cleaning the 15th object, but this one is 18th in the
+  #  list. Note that this can happen only if we reached this point by the
+  #  recursion).
+  # We can ignore it now as the ZCT cleaner will reach it soon.
+
 proc CollectZCT(gch: var TGcHeap): bool =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do 
-  # deep freeing, which is bad for incremental operation. In order to 
-  # avoid a deep stack, we move objects to keep the ZCT small.
-  # This is performance critical!
   const workPackage = 100
   var L = addr(gch.zct.len)
   
@@ -773,35 +1211,30 @@ proc CollectZCT(gch: var TGcHeap): bool =
     var steps = workPackage
     var t0: TTicks
     if gch.maxPause > 0: t0 = getticks()
+  
   while L[] > 0:
     var c = gch.zct.d[0]
+    sysAssert c.isBitUp(rcZct), "CollectZCT: rcZct missing!"
     sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
-    # remove from ZCT:
-    sysAssert((c.refcount and rcZct) == rcZct, "collectZCT")
     
-    c.refcount = c.refcount and not colorMask
+    # remove from ZCT:    
+    c.clearBit(rcZct)
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
     when withRealtime: dec steps
-    if c.refcount <% rcIncrement: 
+    if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
       # ``incref`` does not bother to remove the cell from the ZCT 
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
-      when logGC: writeCell("zct dealloc cell", c)
-      gcTrace(c, csZctFreed)
-      # We are about to free the object, call the finalizer BEFORE its
-      # children are deleted as well, because otherwise the finalizer may
-      # access invalid memory. This is done by prepareDealloc():
-      prepareDealloc(c)
-      forAllChildren(c, waZctDecRef)
-      when reallyDealloc: rawDealloc(gch.region, c)
+      if c.color == rcRetiredBuffer:
+        if c.isBitDown(rcInCycleRoots):
+          freeCell(gch, c)
       else:
-        sysAssert(c.typ != nil, "collectZCT 2")
-        zeroMem(c, sizeof(TCell))
+        # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c)
+        releaseCell(gch, c)
     when withRealtime:
       if steps == 0:
         steps = workPackage
@@ -813,22 +1246,40 @@ proc CollectZCT(gch: var TGcHeap): bool =
           if duration >= gch.maxPause - 50_000:
             return false
   result = true
+  gch.trimCycleRoots
+  #deInit(gch.zct)
+  #init(gch.zct)
 
-proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+proc unmarkStackAndRegisters(gch: var TGcHeap) =
   var d = gch.decStack.d
-  for i in 0..gch.decStack.len-1:
+  for i in 0 .. <gch.decStack.len:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock
+    # XXX: just call doDecRef?
     var c = d[i]
+    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+    
+    if c.color == rcRetiredBuffer:
+      continue
+
     # XXX no need for an atomic dec here:
-    if --c.refcount:
+    if c.refcount--(LocalHeap):
+      # the object survived only because of a stack reference
+      # it still doesn't have heap refernces
       addZCT(gch.zct, c)
-    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+    
+    if canbeCycleRoot(c):
+      # any cyclic object reachable from the stack can be turned into
+      # a leak if it's orphaned through the stack reference
+      # that's because the write-barrier won't be executed for stack
+      # locations
+      addCycleRoot(gch.cycleRoots, c)
+
   gch.decStack.len = 0
 
 proc collectCTBody(gch: var TGcHeap) =
   when withRealtime:
     let t0 = getticks()
+  when debugGC: inc gcCollectionIdx
   sysAssert(allocInv(gch.region), "collectCT: begin")
   
   gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
@@ -842,7 +1293,7 @@ proc collectCTBody(gch: var TGcHeap) =
     when cycleGC:
       if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
         collectCycles(gch)
-        discard collectZCT(gch)
+        sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
         inc(gch.stat.cycleCollections)
         gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
                                  cycleIncrease)
@@ -909,6 +1360,7 @@ when not defined(useNimRtl):
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
+    setStackTop(gch)
     acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
@@ -928,7 +1380,7 @@ when not defined(useNimRtl):
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
              "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when traceGC: writeLeakage()
+    when traceGC: writeLeakage(true)
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 93dc79e3d..a2d132dbf 100755
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -13,11 +13,19 @@ when defined(NimString):
 else:
   {.pragma: codegenType.}
 
-type # This should be he same as ast.TTypeKind
-     # many enum fields are not used at runtime
+type 
+  # This should be he same as ast.TTypeKind
+  # many enum fields are not used at runtime
   TNimKind = enum
-    tyNone, tyBool, tyChar,
-    tyEmpty, tyArrayConstr, tyNil, tyExpr, tyStmt, tyTypeDesc,
+    tyNone,
+    tyBool,
+    tyChar,
+    tyEmpty,
+    tyArrayConstr,
+    tyNil,
+    tyExpr,
+    tyStmt,
+    tyTypeDesc,
     tyGenericInvokation, # ``T[a, b]`` for types to invoke
     tyGenericBody,       # ``T[a, b, body]`` last parameter is the body
     tyGenericInst,       # ``T[a, b, realInstance]`` instantiated generic type
@@ -30,15 +38,30 @@ type # This should be he same as ast.TTypeKind
     tyTuple,             # WARNING: The compiler uses tyTuple for pure objects!
     tySet,
     tyRange,
-    tyPtr, tyRef,
+    tyPtr,
+    tyRef,
     tyVar,
     tySequence,
     tyProc,
-    tyPointer, tyOpenArray,
-    tyString, tyCString, tyForward,
-    tyInt, tyInt8, tyInt16, tyInt32, tyInt64,
-    tyFloat, tyFloat32, tyFloat64, tyFloat128,
-    tyUInt, tyUInt8, tyUInt16, tyUInt32, tyUInt64,
+    tyPointer,
+    tyOpenArray,
+    tyString,
+    tyCString,
+    tyForward,
+    tyInt,
+    tyInt8,
+    tyInt16,
+    tyInt32,
+    tyInt64,
+    tyFloat,
+    tyFloat32,
+    tyFloat64,
+    tyFloat128,
+    tyUInt,
+    tyUInt8,
+    tyUInt16,
+    tyUInt32,
+    tyUInt64,
     tyBigNum,
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 1abf3fbbf..9bf9c1e67 100755
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -21,6 +21,7 @@ const
   alwaysGC = false # collect after every memory allocation (for debugging)
   leakDetector = false
   overwriteFree = false
+  trackAllocationSource = leakDetector
   
   cycleGC = true # (de)activate the cycle GC
   reallyDealloc = true # for debugging purposes this can be set to false
@@ -306,7 +307,10 @@ else:
   include "system/cellsets"
   when not leakDetector:
     sysAssert(sizeof(TCell) == sizeof(TFreeCell), "sizeof TFreeCell")
-  include "system/gc"
+  when true:
+    include "system/gc"
+  else:
+    include "system/oldgc"
   
 {.pop.}
 
diff --git a/lib/system/oldgc.nim b/lib/system/oldgc.nim
new file mode 100644
index 000000000..f3b90e6bd
--- /dev/null
+++ b/lib/system/oldgc.nim
@@ -0,0 +1,1044 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#            Garbage Collector
+#
+# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+# This is achieved by combining a Deutsch-Bobrow garbage collector
+# together with Christoper's partial mark-sweep garbage collector.
+#
+# Special care has been taken to avoid recursion as far as possible to avoid
+# stack overflows when traversing deep datastructures. It is well-suited
+# for soft real time applications (like games).
+{.push profiler:off.}
+
+const
+  CycleIncrease = 2 # is a multiplicative increase
+  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
+  ZctThreshold = 500  # we collect garbage if the ZCT's size
+                      # reaches this threshold
+                      # this seems to be a good value
+  withRealTime = defined(useRealtimeGC)
+
+when withRealTime and not defined(getTicks):
+  include "system/timers"
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int)
+
+include "system/timers"
+
+const
+  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  # NOTE: Most colors are currently unused
+  rcBlack = 0b000  # cell is colored black; in use or free
+  rcGray = 0b001   # possible member of a cycle
+  rcWhite = 0b010  # member of a garbage cycle
+  rcPurple = 0b011 # possible root of a cycle
+  rcZct = 0b100    # in ZCT
+  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
+  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
+  rcShift = 3      # shift by rcShift to get the reference counter
+  colorMask = 0b111
+type
+  TWalkOp = enum
+    waZctDecRef, waPush, waCycleDecRef
+
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+    # A ref type can have a finalizer that is called before the object's
+    # storage is freed.
+
+  TGcStat {.final, pure.} = object
+    stackScans: int          # number of performed stack scans (for statistics)
+    cycleCollections: int    # number of performed full collections
+    maxThreshold: int        # max threshold that has been set
+    maxStackSize: int        # max stack size
+    maxStackCells: int       # max stack cells in ``decStack``
+    cycleTableSize: int      # max entries in cycle table  
+    maxPause: int64          # max measured GC pause in nanoseconds
+  
+  TGcHeap {.final, pure.} = object # this contains the zero count and
+                                   # non-zero count table
+    stackBottom: pointer
+    cycleThreshold: int
+    zct: TCellSeq            # the zero count table
+    decStack: TCellSeq       # cells in the stack that are to decref again
+    cycleRoots: TCellSet
+    tempStack: TCellSeq      # temporary stack for recursion elimination
+    recGcLock: int           # prevent recursion via finalizers; no thread lock
+    when withRealTime:
+      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
+    region: TMemRegion       # garbage collected region
+    stat: TGcStat
+
+var
+  gch {.rtlThreadVar.}: TGcHeap
+
+when not defined(useNimRtl):
+  InstantiateForRegion(gch.region)
+
+template acquire(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+
+template release(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    releaseSys(HeapLock)
+
+proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and rcZct) == 0:
+    c.refcount = c.refcount and not colorMask or rcZct
+    add(s, c)
+
+proc cellToUsr(cell: PCell): pointer {.inline.} =
+  # convert object (=pointer to refcount) to pointer to userdata
+  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+
+proc usrToCell(usr: pointer): PCell {.inline.} =
+  # convert pointer to userdata to object (=pointer to refcount)
+  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
+
+proc canbeCycleRoot(c: PCell): bool {.inline.} =
+  result = ntfAcyclic notin c.typ.flags
+
+proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
+  # used for code generation concerning debugging
+  result = usrToCell(c).typ
+
+proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
+  result = int(usrToCell(p).refcount) shr rcShift
+
+# this that has to equals zero, otherwise we have to round up UnitsPerPage:
+when BitsPerPage mod (sizeof(int)*8) != 0:
+  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
+
+when debugGC:
+  proc writeCell(msg: CString, c: PCell) =
+    var kind = -1
+    if c.typ != nil: kind = ord(c.typ.kind)
+    when leakDetector:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+    else:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+                msg, c, kind, c.refcount shr rcShift)
+
+when traceGC:
+  # traceGC is a special switch to enable extensive debugging
+  type
+    TCellState = enum
+      csAllocated, csZctFreed, csCycFreed
+  var
+    states: array[TCellState, TCellSet]
+
+  proc traceCell(c: PCell, state: TCellState) =
+    case state
+    of csAllocated:
+      if c in states[csAllocated]:
+        writeCell("attempt to alloc an already allocated cell", c)
+        sysAssert(false, "traceCell 1")
+      excl(states[csCycFreed], c)
+      excl(states[csZctFreed], c)
+    of csZctFreed:
+      if c in states[csZctFreed]:
+        writeCell("attempt to free zct cell twice", c)
+        sysAssert(false, "traceCell 2")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free with zct, but already freed with cyc", c)
+        sysAssert(false, "traceCell 3")
+      if c notin states[csAllocated]:
+        writeCell("attempt to free not an allocated cell", c)
+        sysAssert(false, "traceCell 4")
+      excl(states[csAllocated], c)
+    of csCycFreed:
+      if c notin states[csAllocated]:
+        writeCell("attempt to free a not allocated cell", c)
+        sysAssert(false, "traceCell 5")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free cyc cell twice", c)
+        sysAssert(false, "traceCell 6")
+      if c in states[csZctFreed]:
+        writeCell("attempt to free with cyc, but already freed with zct", c)
+        sysAssert(false, "traceCell 7")
+      excl(states[csAllocated], c)
+    incl(states[state], c)
+
+  proc writeLeakage() =
+    var z = 0
+    var y = 0
+    var e = 0
+    for c in elements(states[csAllocated]):
+      inc(e)
+      if c in states[csZctFreed]: inc(z)
+      elif c in states[csCycFreed]: inc(y)
+      else: writeCell("leak", c)
+    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
+             e, z, y)
+
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+
+when hasThreadSupport and hasSharedHeap:
+  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
+  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
+else:
+  template `--`(x: expr): expr = 
+    Dec(x, rcIncrement)
+    x <% rcIncrement
+  template `++`(x: expr): stmt = Inc(x, rcIncrement)
+
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
+
+proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  incl(gch.cycleRoots, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc rtlAddZCT(c: PCell) {.rtl, inl.} =
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  addZCT(gch.zct, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc decRef(c: PCell) {.inline.} =
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if --c.refcount:
+    rtlAddZCT(c)
+  elif canBeCycleRoot(c):
+    # unfortunately this is necessary here too, because a cycle might just
+    # have been broken up and we could recycle it.
+    rtlAddCycleRoot(c) 
+
+proc incRef(c: PCell) {.inline.} = 
+  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  ++c.refcount
+  if canBeCycleRoot(c):
+    rtlAddCycleRoot(c)
+
+proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+
+proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
+  var c = usrToCell(p)
+  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  if --c.refcount:
+    rtlAddZCT(c)
+    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
+  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
+
+proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  sysAssert(not isOnStack(dest), "asgnRef")
+  # BUGFIX: first incRef then decRef!
+  if src != nil: incRef(usrToCell(src))
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
+
+proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc if it is known at compile time that no 
+  # cycle is possible.
+  if src != nil:
+    var c = usrToCell(src)
+    ++c.refcount
+  if dest[] != nil: 
+    var c = usrToCell(dest[])
+    if --c.refcount:
+      rtlAddZCT(c)
+  dest[] = src
+
+proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
+  # unsureAsgnRef updates the reference counters only if dest is not on the
+  # stack. It is used by the code generator if it cannot decide wether a
+  # reference is in the stack or not (this can happen for var parameters).
+  if not IsOnStack(dest):
+    if src != nil: incRef(usrToCell(src))
+    # XXX finally use assembler for the stack checking instead!
+    # the test for '!= nil' is correct, but I got tired of the segfaults
+    # resulting from the crappy stack checking:
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+  else:
+    # can't be an interior pointer if it's a stack location!
+    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+              "stack loc AND interior pointer")
+  dest[] = src
+
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(TCellState)..high(TCellState): Init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.tempStack)
+    Init(gch.cycleRoots)
+    Init(gch.decStack)
+
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  case n.kind
+  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
+  of nkList:
+    for i in 0..n.len-1:
+      # inlined for speed
+      if n.sons[i].kind == nkSlot:
+        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
+          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
+        else:
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+                            n.sons[i].typ, op)
+      else:
+        forAllSlotsAux(dest, n.sons[i], op)
+  of nkCase:
+    var m = selectBranch(dest, n)
+    if m != nil: forAllSlotsAux(dest, m, op)
+  of nkNone: sysAssert(false, "forAllSlotsAux")
+
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  if dest == nil: return # nothing to do
+  if ntfNoRefs notin mt.flags:
+    case mt.Kind
+    of tyRef, tyString, tySequence: # leaf:
+      doOperation(cast[ppointer](d)[], op)
+    of tyObject, tyTuple:
+      forAllSlotsAux(dest, mt.node, op)
+    of tyArray, tyArrayConstr, tyOpenArray:
+      for i in 0..(mt.size div mt.base.size)-1:
+        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
+    else: nil
+
+proc forAllChildren(cell: PCell, op: TWalkOp) =
+  sysAssert(cell != nil, "forAllChildren: 1")
+  sysAssert(cell.typ != nil, "forAllChildren: 2")
+  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  let marker = cell.typ.marker
+  if marker != nil:
+    marker(cellToUsr(cell), op.int)
+  else:
+    case cell.typ.Kind
+    of tyRef: # common case
+      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
+    of tySequence:
+      var d = cast[TAddress](cellToUsr(cell))
+      var s = cast[PGenericSeq](d)
+      if s != nil:
+        let baseAddr = d +% GenericSeqSize
+        for i in 0..s.len-1:
+          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
+                            cell.typ.base, op)
+    else: nil
+
+proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+  # we check the last 8 entries (cache line) for a slot that could be reused.
+  # In 63% of all cases we succeed here! But we have to optimize the heck
+  # out of this small linear search so that ``newObj`` is not slowed down.
+  # 
+  # Slots to try          cache hit
+  # 1                     32%
+  # 4                     59%
+  # 8                     63%
+  # 16                    66%
+  # all slots             68%
+  var L = gch.zct.len
+  var d = gch.zct.d
+  when true:
+    # loop unrolled for performance:
+    template replaceZctEntry(i: expr) =
+      c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    if L > 8:
+      var c: PCell
+      replaceZctEntry(L-1)
+      replaceZctEntry(L-2)
+      replaceZctEntry(L-3)
+      replaceZctEntry(L-4)
+      replaceZctEntry(L-5)
+      replaceZctEntry(L-6)
+      replaceZctEntry(L-7)
+      replaceZctEntry(L-8)
+      add(gch.zct, res)
+    else:
+      d[L] = res
+      inc(gch.zct.len)
+  else:
+    for i in countdown(L-1, max(0, L-8)):
+      var c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    add(gch.zct, res)
+
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+  # generates a new object and sets its reference counter to 0
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "rawNewObj end")
+
+{.pop.}
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  # `newObj` already uses locks, so no need for them here.
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  # generates a new object and sets its reference counter to 1
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
+  
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcIncrement # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObjRC1(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+  
+proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+  acquire(gch)
+  collectCT(gch)
+  var ol = usrToCell(old)
+  sysAssert(ol.typ != nil, "growObj: 1")
+  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  sysAssert(allocInv(gch.region), "growObj begin")
+
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  
+  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(res, ol, oldsize + sizeof(TCell))
+  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+          newsize-oldsize)
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  #if res.refcount <% rcIncrement:
+  #  add(gch.zct, res)
+  #else: # XXX: what to do here?
+  #  decRef(ol)
+  if (ol.refcount and colorMask) == rcZct:
+    var j = gch.zct.len-1
+    var d = gch.zct.d
+    while j >= 0: 
+      if d[j] == ol:
+        d[j] = res
+        break
+      dec(j)
+  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
+  when logGC:
+    writeCell("growObj old cell", ol)
+    writeCell("growObj new cell", res)
+  gcTrace(ol, csZctFreed)
+  gcTrace(res, csAllocated)
+  when reallyDealloc: rawDealloc(gch.region, ol)
+  else:
+    sysAssert(ol.typ != nil, "growObj: 5")
+    zeroMem(ol, sizeof(TCell))
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "growObj end")
+  when defined(memProfiler): nimProfile(newsize-oldsize)
+
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(old, newsize, gch)
+
+{.push profiler:off.}
+
+# ---------------- cycle collector -------------------------------------------
+
+var
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+proc doOperation(p: pointer, op: TWalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  sysAssert(c != nil, "doOperation: 1")
+  case op # faster than function pointers because of easy prediction
+  of waZctDecRef:
+    #if not isAllocatedPtr(gch.region, c):
+    #  return
+    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
+    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    c.refcount = c.refcount -% rcIncrement
+    when logGC: writeCell("decref (from doOperation)", c)
+    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+  of waPush:
+    add(gch.tempStack, c)
+  of waCycleDecRef:
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
+    c.refcount = c.refcount -% rcIncrement
+    inc decrefs
+
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, TWalkOp(op))
+  
+# we now use a much simpler and non-recursive algorithm for cycle removal
+proc collectCycles(gch: var TGcHeap) =
+  var tabSize = 0
+  let tStart = getTicks()
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+  # XXX: acyclic cutoff (specialized marker procs)
+  # short trim cycle roots
+  # long trim with threshold
+  # don't add new objects to both ztc and cycleroots?
+  # leak detector with hash in rawNew / free
+  #
+  for c in elements(gch.cycleRoots):
+    inc(tabSize)
+    forallChildren(c, waCycleDecRef)
+  if tabSize == 0: return
+  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
+
+  c_printf "COLLECT CYCLES: %d\n", tabSize
+  let tAfterMark = getTicks()
+
+  # restore reference counts (a depth-first traversal is needed):
+  var marker: TCellSet
+  Init(marker)
+  for c in elements(gch.cycleRoots):
+    if c.refcount >=% rcIncrement:
+      inc marked
+      if not containsOrIncl(marker, c):
+        gch.tempStack.len = 0
+        forAllChildren(c, waPush)
+        while gch.tempStack.len > 0:
+          dec(gch.tempStack.len)
+          var d = gch.tempStack.d[gch.tempStack.len]
+          d.refcount = d.refcount +% rcIncrement
+          inc increfs
+          if d in gch.cycleRoots and not containsOrIncl(marker, d):
+            forAllChildren(d, waPush)
+  
+  let tAfterScan = getTicks()
+
+  # remove cycles:
+  for c in elements(gch.cycleRoots):
+    if c.refcount <% rcIncrement:
+      inc collected
+      gch.tempStack.len = 0
+      forAllChildren(c, waPush)
+      while gch.tempStack.len > 0:
+        dec(gch.tempStack.len)
+        var d = gch.tempStack.d[gch.tempStack.len]
+        if d.refcount <% rcIncrement:
+          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
+            addZCT(gch.zct, d)
+            when logGC: writeCell("add to ZCT (from cycle collector)", d)
+      prepareDealloc(c)
+      gcTrace(c, csCycFreed)
+      when logGC: writeCell("cycle collector dealloc cell", c)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectCycles")
+        zeroMem(c, sizeof(TCell))
+
+  let tFinal = getTicks()
+
+  cprintf "times:\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked: %d\n  collected: %d\n",
+    (tAfterMark - tStart) div 1_000_000,
+    (tAfterScan - tAfterMark) div 1_000_000,
+    (tFinal - tAfterScan) div 1_000_000,
+    decrefs,
+    increfs,
+    marked,
+    collected
+
+  Deinit(gch.cycleRoots)
+  Init(gch.cycleRoots)
+
+var gcDebugging* = false
+var vis*: proc (a: pointer, b: PNimType)
+
+proc debugNode(n: ptr TNimNode) =
+  c_fprintf(c_stdout, "node %s\n", n.name)
+  for i in 0..n.len-1:
+    debugNode(n.sons[i])
+
+proc debugTyp(x: PNimType) =
+  c_fprintf(c_stdout, "type %d\n", x.kind)
+  if x.node != nil:
+    debugNode(x.node)
+
+var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+
+type
+  TCyclicMode = enum
+    Cyclic,
+    Acyclic,
+    MaybeCyclic
+
+  TReleaseType = enum
+    AddToZTC
+    FreeImmediately
+
+  THeapType = enum
+    LocalHeap
+    SharedHeap
+
+template `++` (rc: TRefCount, heapType: THeapType): stmt =
+  when heapType == SharedHeap:
+    discard atomicInc(rc, rcIncrement)
+  else:
+    inc rc, rcIncrement
+
+template `--`(rc: TRefCount): expr =
+  dec rc, rcIncrement
+  rc <% rcIncrement
+
+template `--` (rc: TRefCount, heapType: THeapType): expr =
+  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
+   else: --rc)
+
+template doDecRef(cc: PCell,
+                  heapType = LocalHeap,
+                  cycleFlag = MaybeCyclic): stmt =
+  var c = cc
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  # XXX: move this elesewhere
+
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if c.refcount--(heapType):
+    # this is the last reference from the heap
+    # add to a zero-count-table that will be matched against stack pointers
+    rtlAddZCT(c)
+    # writeCell("decref to 0", c)
+  else:
+    when cycleFlag != Acyclic:
+      if cycleFlag == Cyclic or canBeCycleRoot(c):
+        # a cycle may have been broken
+        rtlAddCycleRoot(c)
+
+proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+  # the addresses are not as cells on the stack, so turn them to cells:
+  sysAssert(allocInv(gch.region), "gcMark begin")
+  var cell = usrToCell(p)
+  var c = cast[TAddress](cell)
+  if c >% PageSize:
+    # fast check: does it look like a cell?
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    if objStart != nil:
+      # mark the cell:
+      if not gcDebugging:
+        objStart.refcount = objStart.refcount +% rcIncrement
+        add(gch.decStack, objStart)
+    when false:
+      if isAllocatedPtr(gch.region, cell):
+        sysAssert false, "allocated pointer but not interior?"
+        # mark the cell:
+        cell.refcount = cell.refcount +% rcIncrement
+        add(gch.decStack, cell)
+  sysAssert(allocInv(gch.region), "gcMark end")
+
+proc markThreadStacks(gch: var TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    {.error: "not fully implemented".}
+    var it = threadList
+    while it != nil:
+      # mark registers: 
+      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
+      var sp = cast[TAddress](it.stackBottom)
+      var max = cast[TAddress](it.stackTop)
+      # XXX stack direction?
+      # XXX unroll this loop:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+      it = it.next
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+    else:
+      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[TAddress](gch.stackBottom)
+      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](min(a, b))
+      else:
+        gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+proc stackSize(): int {.noinline.} =
+  var stackTop {.volatile.}: pointer
+  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+when defined(sparc): # For SPARC architecture.
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a TJmpBuf in the generated C code
+      # in a platform independant way
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array [0..7, pointer]
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers))
+      # loop unrolled:
+      while sp <% max - 8*sizeof(pointer):
+        gcMark(gch, cast[PStackSlice](sp)[0])
+        gcMark(gch, cast[PStackSlice](sp)[1])
+        gcMark(gch, cast[PStackSlice](sp)[2])
+        gcMark(gch, cast[PStackSlice](sp)[3])
+        gcMark(gch, cast[PStackSlice](sp)[4])
+        gcMark(gch, cast[PStackSlice](sp)[5])
+        gcMark(gch, cast[PStackSlice](sp)[6])
+        gcMark(gch, cast[PStackSlice](sp)[7])
+        sp = sp +% sizeof(pointer)*8
+      # last few entries:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
+
+proc CollectZCT(gch: var TGcHeap): bool =
+  # Note: Freeing may add child objects to the ZCT! So essentially we do 
+  # deep freeing, which is bad for incremental operation. In order to 
+  # avoid a deep stack, we move objects to keep the ZCT small.
+  # This is performance critical!
+  const workPackage = 100
+  var L = addr(gch.zct.len)
+  
+  when withRealtime:
+    var steps = workPackage
+    var t0: TTicks
+    if gch.maxPause > 0: t0 = getticks()
+  while L[] > 0:
+    var c = gch.zct.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
+    # remove from ZCT:
+    sysAssert((c.refcount and rcZct) == rcZct, "collectZCT")
+    
+    c.refcount = c.refcount and not colorMask
+    gch.zct.d[0] = gch.zct.d[L[] - 1]
+    dec(L[])
+    when withRealtime: dec steps
+    if c.refcount <% rcIncrement: 
+      # It may have a RC > 0, if it is in the hardware stack or
+      # it has not been removed yet from the ZCT. This is because
+      # ``incref`` does not bother to remove the cell from the ZCT 
+      # as this might be too slow.
+      # In any case, it should be removed from the ZCT. But not
+      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
+      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
+      when logGC: writeCell("zct dealloc cell", c)
+      gcTrace(c, csZctFreed)
+      # We are about to free the object, call the finalizer BEFORE its
+      # children are deleted as well, because otherwise the finalizer may
+      # access invalid memory. This is done by prepareDealloc():
+      prepareDealloc(c)
+      forAllChildren(c, waZctDecRef)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(TCell))
+    when withRealtime:
+      if steps == 0:
+        steps = workPackage
+        if gch.maxPause > 0:
+          let duration = getticks() - t0
+          # the GC's measuring is not accurate and needs some cleanup actions 
+          # (stack unmarking), so subtract some short amount of time in to
+          # order to miss deadlines less often:
+          if duration >= gch.maxPause - 50_000:
+            return false
+  result = true
+
+proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
+    # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock
+    var c = d[i]
+    # XXX no need for an atomic dec here:
+    if --c.refcount:
+      addZCT(gch.zct, c)
+    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+  gch.decStack.len = 0
+
+proc collectCTBody(gch: var TGcHeap) =
+  when withRealtime:
+    let t0 = getticks()
+  sysAssert(allocInv(gch.region), "collectCT: begin")
+  
+  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  sysAssert(gch.decStack.len == 0, "collectCT")
+  prepareForInteriorPointerChecking(gch.region)
+  markStackAndRegisters(gch)
+  markThreadStacks(gch)
+  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
+  inc(gch.stat.stackScans)
+  if collectZCT(gch):
+    when cycleGC:
+      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
+        collectCycles(gch)
+        discard collectZCT(gch)
+        inc(gch.stat.cycleCollections)
+        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                 cycleIncrease)
+        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+  unmarkStackAndRegisters(gch)
+  sysAssert(allocInv(gch.region), "collectCT: end")
+  
+  when withRealtime:
+    let duration = getticks() - t0
+    gch.stat.maxPause = max(gch.stat.maxPause, duration)
+    when defined(reportMissedDeadlines):
+      if gch.maxPause > 0 and duration > gch.maxPause:
+        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
+
+proc collectCT(gch: var TGcHeap) =
+  if (gch.zct.len >= ZctThreshold or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+      gch.recGcLock == 0:
+    collectCTBody(gch)
+
+when withRealtime:
+  proc toNano(x: int): TNanos {.inline.} =
+    result = x * 1000
+
+  proc GC_setMaxPause*(MaxPauseInUs: int) =
+    gch.maxPause = MaxPauseInUs.toNano
+
+  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
+    acquire(gch)
+    gch.maxPause = us.toNano
+    if (gch.zct.len >= ZctThreshold or (cycleGC and
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+        strongAdvice:
+      collectCTBody(gch)
+    release(gch)
+
+  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+
+when not defined(useNimRtl):
+  proc GC_disable() = 
+    when hasThreadSupport and hasSharedHeap:
+      discard atomicInc(gch.recGcLock, 1)
+    else:
+      inc(gch.recGcLock)
+  proc GC_enable() =
+    if gch.recGcLock > 0: 
+      when hasThreadSupport and hasSharedHeap:
+        discard atomicDec(gch.recGcLock, 1)
+      else:
+        dec(gch.recGcLock)
+
+  proc GC_setStrategy(strategy: TGC_Strategy) =
+    case strategy
+    of gcThroughput: nil
+    of gcResponsiveness: nil
+    of gcOptimizeSpace: nil
+    of gcOptimizeTime: nil
+
+  proc GC_enableMarkAndSweep() =
+    gch.cycleThreshold = InitialCycleThreshold
+
+  proc GC_disableMarkAndSweep() =
+    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    # set to the max value to suppress the cycle detector
+
+  proc GC_fullCollect() =
+    acquire(gch)
+    var oldThreshold = gch.cycleThreshold
+    gch.cycleThreshold = 0 # forces cycle collection
+    collectCT(gch)
+    gch.cycleThreshold = oldThreshold
+    release(gch)
+
+  proc GC_getStatistics(): string =
+    GC_disable()
+    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
+             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
+             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
+             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
+             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
+             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
+             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
+             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
+             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when traceGC: writeLeakage()
+    GC_enable()
+
+{.pop.}
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 5d2113439..55223d6c6 100755
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -204,10 +204,21 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc):
       for i in newLen..result.len-1:
+        let len0 = gch.tempStack.len
         forAllChildrenAux(cast[pointer](cast[TAddress](result) +%
                           GenericSeqSize +% (i*%elemSize)),
-                          extGetCellType(result).base, waZctDecRef)
-    # and set the memory to nil:
+                          extGetCellType(result).base, waPush)
+        let len1 = gch.tempStack.len
+        for i in len0 .. <len1:
+          doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
+        gch.tempStack.len = len0
+                          
+    # XXX: zeroing out the memory can still result in crashes if a wiped-out
+    # cell is aliased by another pointer (ie proc paramter or a let variable).
+    # This is a tought problem, because even if we don't zeroMem here, in the
+    # presense of user defined destructors, the user will expect the cell to be
+    # "destroyed" thus creating the same problem. We can destoy the cell in the
+    # finalizer of the sequence, but this makes destruction non-deterministic.
     zeroMem(cast[pointer](cast[TAddress](result) +% GenericSeqSize +%
            (newLen*%elemSize)), (result.len-%newLen) *% elemSize)
   result.len = newLen
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index 0166c1e3f..fa1a13a5f 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -44,10 +44,11 @@ elif defined(macosx):
 
   proc getTicks(): TTicks {.inline.} =
     result = TTicks(mach_absolute_time())
-
+  
+  var timeBaseInfo: TMachTimebaseInfoData
+  mach_timebase_info(timeBaseInfo)
+    
   proc `-`(a, b: TTicks): TNanos =
-    var timeBaseInfo: TMachTimebaseInfoData
-    mach_timebase_info(timeBaseInfo)
     result = (a.int64 - b.int64)  * timeBaseInfo.numer div timeBaseInfo.denom
 
 elif defined(posixRealtime):
diff --git a/lib/wrappers/openssl.nim b/lib/wrappers/openssl.nim
index f692db13a..438774a15 100755
--- a/lib/wrappers/openssl.nim
+++ b/lib/wrappers/openssl.nim
@@ -49,9 +49,9 @@ else:
   const
     versions = "(|.1.0.0|.0.9.9|.0.9.8|.0.9.7|.0.9.6|.0.9.5|.0.9.4)"
   when defined(macosx):
-    const 
-      DLLSSLName = "libssl.dylib" & versions
-      DLLUtilName = "libcrypto.dylib" & versions
+    const
+      DLLSSLName = "libssl" & versions & ".dylib"
+      DLLUtilName = "libcrypto" & versions & ".dylib"
   else:
     const 
       DLLSSLName = "libssl.so" & versions
diff --git a/tests/compile/theaproots.nim b/tests/compile/theaproots.nim
new file mode 100644
index 000000000..aec140f42
--- /dev/null
+++ b/tests/compile/theaproots.nim
@@ -0,0 +1,71 @@
+type 
+  Bar = object
+    x: int
+  
+  Foo = object
+    rheap: ref Bar
+    rmaybe: ref Bar
+    rstack: ref Bar
+    list: seq[ref Bar]
+    listarr: array[0..5, ref Bar]
+    nestedtup: Tup
+    inner: TInner
+    inref: ref TInner
+
+  TInner = object
+    inref: ref Bar
+
+  Tup = tuple
+    tupbar: ref Bar
+    inner: TInner
+
+proc acc(x: var Foo): var ref Bar =
+  result = x.rheap
+
+proc test(maybeFoo: var Foo,
+          maybeSeq: var seq[ref Bar],
+          bars: var openarray[ref Bar],
+          maybeTup: var Tup) =
+  var bb: ref Bar
+  maybeFoo.rmaybe = bb
+  maybeFoo.list[3] = bb
+  maybeFoo.listarr[3] = bb
+  acc(maybeFoo) = bb
+  
+  var localFoo: Foo
+  localFoo.rstack = bb
+  localFoo.list[3] = bb
+  localFoo.listarr[3] = bb
+  acc(localFoo) = bb
+
+  var heapFoo: ref Foo
+  heapFoo.rheap = bb
+  heapFoo.list[3] = bb
+  heapFoo.listarr[3] = bb
+  acc(heapFoo[]) = bb
+
+  heapFoo.nestedtup.tupbar = bb
+  heapFoo.nestedtup.inner.inref = bb
+  heapFoo.inner.inref = bb
+  heapFoo.inref.inref = bb
+
+  var locseq: seq[ref Bar]
+  locseq[3] = bb
+
+  var locarr: array[0..4, ref Bar]
+  locarr[3] = bb
+
+  maybeSeq[3] = bb
+
+  bars[3] = bb
+
+  maybeTup[0] = bb
+
+var
+  ff: ref Foo
+  tt: Tup
+  gseq: seq[ref Bar]
+
+new(ff)
+
+test(ff[], gseq, gseq, tt)
diff --git a/tests/gc/refarrayleak.nim b/tests/gc/refarrayleak.nim
new file mode 100644
index 000000000..12c9145f8
--- /dev/null
+++ b/tests/gc/refarrayleak.nim
@@ -0,0 +1,39 @@
+discard """
+  outputsub: "no leak: "
+"""
+
+type
+  TNode = object
+    data: array[0..300, char]
+
+  PNode = ref TNode
+
+  TNodeArray = array[0..10, PNode]
+
+  TArrayHolder = object
+    sons: TNodeArray
+
+proc nullify(a: var TNodeArray) =
+  for i in 0..high(a):
+    a[i] = nil
+
+proc newArrayHolder: ref TArrayHolder =
+  new result
+
+  for i in 0..high(result.sons):
+    new result.sons[i]
+
+  nullify result.sons
+
+proc loop =
+  for i in 0..10000:
+    discard newArrayHolder()
+    
+  if getOccupiedMem() > 300_000:
+    echo "still a leak! ", getOccupiedMem()
+    quit 1
+  else:
+    echo "no leak: ", getOccupiedMem()
+
+loop()
+
diff --git a/tests/gc/stackrefleak.nim b/tests/gc/stackrefleak.nim
index 2c652d6bf..302ef3599 100644
--- a/tests/gc/stackrefleak.nim
+++ b/tests/gc/stackrefleak.nim
@@ -29,5 +29,3 @@ proc loop =
 
 loop()
 
-
-