57 files changed, 2314 insertions, 228 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index d74818e1f..7ff22e184 100644
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -561,7 +561,7 @@ type
     mFloat, mFloat32, mFloat64, mFloat128,
     mBool, mChar, mString, mCstring,
     mPointer, mEmptySet, mIntSetBaseType, mNil, mExpr, mStmt, mTypeDesc,
-    mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn,
+    mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn, mDeepCopy,
     mIsMainModule, mCompileDate, mCompileTime, mNimrodVersion, mNimrodMajor,
     mNimrodMinor, mNimrodPatch, mCpuEndian, mHostOS, mHostCPU, mAppType,
     mNaN, mInf, mNegInf,
@@ -606,9 +606,9 @@ const
   # thus cannot be overloaded (also documented in the spec!):
   SpecialSemMagics* = {
     mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, 
-    mEcho, mShallowCopy, mExpandToAst}
+    mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn}
 
-type 
+type
   PNode* = ref TNode
   TNodeSeq* = seq[PNode]
   PType* = ref TType
@@ -886,6 +886,8 @@ const
 
   nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix,
                   nkCommand, nkCallStrLit, nkHiddenCallConv}
+  nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice,
+                   nkClosedSymChoice}
 
   nkLiterals* = {nkCharLit..nkTripleStrLit}
   nkLambdaKinds* = {nkLambda, nkDo}
diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim
index a7840305d..71e23aa1d 100644
--- a/compiler/ccgcalls.nim
+++ b/compiler/ccgcalls.nim
@@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope =
     initLocExpr(p, q[2], b)
     initLocExpr(p, q[3], c)
     let fmt =
-      case skipTypes(a.t, abstractVar).kind
+      case skipTypes(a.t, abstractVar+{tyPtr}).kind
       of tyOpenArray, tyVarargs, tyArray, tyArrayConstr:
         "($1)+($2), ($3)-($2)+1"
       of tyString, tySequence:
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index 13b3091fc..e6b22819f 100644
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -1652,7 +1652,10 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
   of mSlurp..mQuoteAst:
     localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s)
   of mSpawn:
-    let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1])
+    let n = lowerings.wrapProcForSpawn(p.module.module, e, e.typ, nil, nil)
+    expr(p, n, d)
+  of mParallel:
+    let n = semparallel.liftParallel(p.module.module, e)
     expr(p, n, d)
   else: internalError(e.info, "genMagicExpr: " & $op)
 
diff --git a/compiler/cgen.nim b/compiler/cgen.nim
index 7094990f7..a5852c735 100644
--- a/compiler/cgen.nim
+++ b/compiler/cgen.nim
@@ -14,7 +14,8 @@ import
   options, intsets,
   nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os,
   times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth,
-  rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings
+  rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings,
+  semparallel
 
 when options.hasTinyCBackend:
   import tccgen
diff --git a/compiler/guards.nim b/compiler/guards.nim
index f475f5068..4cf06fe02 100644
--- a/compiler/guards.nim
+++ b/compiler/guards.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2014 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,7 +9,8 @@
 
 ## This module implements the 'implies' relation for guards.
 
-import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents
+import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents,
+  saturate
 
 const
   someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc,
@@ -25,6 +26,17 @@ const
 
   someIn = {mInRange, mInSet}
 
+  someHigh = {mHigh}
+  # we don't list unsigned here because wrap around semantics suck for
+  # proving anything:
+  someAdd = {mAddI, mAddI64, mAddF64, mSucc}
+  someSub = {mSubI, mSubI64, mSubF64, mPred}
+  someMul = {mMulI, mMulI64, mMulF64}
+  someDiv = {mDivI, mDivI64, mDivF64}
+  someMod = {mModI, mModI64}
+  someMax = {mMaxI, mMaxI64, mMaxF64}
+  someMin = {mMinI, mMinI64, mMinF64}
+
 proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit}
 proc isLocation(n: PNode): bool = not n.isValue
 
@@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool =
 
 proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true)
 
-proc getMagicOp(name: string, m: TMagic): PSym =
+proc createMagic*(name: string, m: TMagic): PSym =
   result = newSym(skProc, getIdent(name), nil, unknownLineInfo())
   result.magic = m
 
 let
-  opLe = getMagicOp("<=", mLeI)
-  opLt = getMagicOp("<", mLtI)
-  opAnd = getMagicOp("and", mAnd)
-  opOr = getMagicOp("or", mOr)
-  opNot = getMagicOp("not", mNot)
-  opIsNil = getMagicOp("isnil", mIsNil)
-  opContains = getMagicOp("contains", mInSet)
-  opEq = getMagicOp("==", mEqI)
+  opLe = createMagic("<=", mLeI)
+  opLt = createMagic("<", mLtI)
+  opAnd = createMagic("and", mAnd)
+  opOr = createMagic("or", mOr)
+  opNot = createMagic("not", mNot)
+  opIsNil = createMagic("isnil", mIsNil)
+  opContains = createMagic("contains", mInSet)
+  opEq = createMagic("==", mEqI)
+  opAdd = createMagic("+", mAddI)
+  opSub = createMagic("-", mSubI)
+  opMul = createMagic("*", mMulI)
+  opDiv = createMagic("div", mDivI)
+  opLen = createMagic("len", mLengthSeq)
 
 proc swapArgs(fact: PNode, newOp: PSym): PNode =
   result = newNodeI(nkCall, fact.info, 3)
@@ -137,17 +154,141 @@ proc neg(n: PNode): PNode =
     result.sons[0] = newSymNode(opNot)
     result.sons[1] = n
 
-proc buildIsNil(arg: PNode): PNode =
-  result = newNodeI(nkCall, arg.info, 2)
-  result.sons[0] = newSymNode(opIsNil)
-  result.sons[1] = arg
+proc buildCall(op: PSym; a: PNode): PNode =
+  result = newNodeI(nkCall, a.info, 2)
+  result.sons[0] = newSymNode(op)
+  result.sons[1] = a
+
+proc buildCall(op: PSym; a, b: PNode): PNode =
+  result = newNodeI(nkInfix, a.info, 3)
+  result.sons[0] = newSymNode(op)
+  result.sons[1] = a
+  result.sons[2] = b
+
+proc `|+|`(a, b: PNode): PNode =
+  result = copyNode(a)
+  if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal
+  else: result.floatVal = a.floatVal + b.floatVal
+
+proc `|*|`(a, b: PNode): PNode =
+  result = copyNode(a)
+  if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal
+  else: result.floatVal = a.floatVal * b.floatVal
+
+proc negate(a, b, res: PNode): PNode =
+  if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt):
+    var b = copyNode(b)
+    b.intVal = -b.intVal
+    if a.kind in {nkCharLit..nkUInt64Lit}:
+      b.intVal = b.intVal |+| a.intVal
+      result = b
+    else:
+      result = buildCall(opAdd, a, b)
+  elif b.kind in {nkFloatLit..nkFloat64Lit}:
+    var b = copyNode(b)
+    b.floatVal = -b.floatVal
+    result = buildCall(opAdd, a, b)
+  else:
+    result = res
+
+proc zero(): PNode = nkIntLit.newIntNode(0)
+proc one(): PNode = nkIntLit.newIntNode(1)
+proc minusOne(): PNode = nkIntLit.newIntNode(-1)
+
+proc lowBound*(x: PNode): PNode = 
+  result = nkIntLit.newIntNode(firstOrd(x.typ))
+  result.info = x.info
+
+proc highBound*(x: PNode): PNode =
+  result = if x.typ.skipTypes(abstractInst).kind == tyArray:
+             nkIntLit.newIntNode(lastOrd(x.typ))
+           else:
+             opAdd.buildCall(opLen.buildCall(x), minusOne())
+  result.info = x.info
+
+proc reassociation(n: PNode): PNode =
+  result = n
+  # (foo+5)+5 --> foo+10;  same for '*'
+  case result.getMagic
+  of someAdd:
+    if result[2].isValue and 
+        result[1].getMagic in someAdd and result[1][2].isValue:
+      result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2])
+  of someMul:
+    if result[2].isValue and 
+        result[1].getMagic in someMul and result[1][2].isValue:
+      result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2])
+  else: discard
+
+proc canon*(n: PNode): PNode =
+  # XXX for now only the new code in 'semparallel' uses this
+  if n.safeLen >= 1:
+    result = shallowCopy(n)
+    for i in 0 .. < n.len:
+      result.sons[i] = canon(n.sons[i])
+  else:
+    result = n
+  case result.getMagic
+  of someEq, someAdd, someMul, someMin, someMax:
+    # these are symmetric; put value as last:
+    if result.sons[1].isValue and not result.sons[2].isValue:
+      result = swapArgs(result, result.sons[0].sym)
+      # (4 + foo) + 2 --> (foo + 4) + 2
+  of someHigh:
+    # high == len+(-1)
+    result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne())
+  of mUnaryMinusI, mUnaryMinusI64:
+    result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1))
+  of someSub:
+    # x - 4  -->  x + (-4)
+    result = negate(result[1], result[2], result)
+  of someLen:
+    result.sons[0] = opLen.newSymNode
+  else: discard
+
+  result = skipConv(result)
+  result = reassociation(result)
+  # most important rule: (x-4) < a.len -->  x < a.len+4
+  case result.getMagic
+  of someLe, someLt:
+    let x = result[1]
+    let y = result[2]
+    if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and 
+        isLetLocation(x[1], true):
+      case x.getMagic
+      of someSub:
+        result = buildCall(result[0].sym, x[1], 
+                           reassociation(opAdd.buildCall(y, x[2])))
+      of someAdd:
+        # Rule A:
+        let plus = negate(y, x[2], nil).reassociation
+        if plus != nil: result = buildCall(result[0].sym, x[1], plus)
+      else: discard
+    elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and 
+        isLetLocation(y[1], true):
+      # a.len < x-3
+      case y.getMagic
+      of someSub:
+        result = buildCall(result[0].sym, y[1],
+                           reassociation(opAdd.buildCall(x, y[2])))
+      of someAdd:
+        let plus = negate(x, y[2], nil).reassociation
+        # ensure that Rule A will not trigger afterwards with the
+        # additional 'not isLetLocation' constraint:
+        if plus != nil and not isLetLocation(x, true):
+          result = buildCall(result[0].sym, plus, y[1])
+      else: discard
+  else: discard
+
+proc `+@`*(a: PNode; b: BiggestInt): PNode =
+  canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a)
 
 proc usefulFact(n: PNode): PNode =
   case n.getMagic
   of someEq:
     if skipConv(n.sons[2]).kind == nkNilLit and (
         isLetLocation(n.sons[1], false) or isVar(n.sons[1])):
-      result = buildIsNil(n.sons[1])
+      result = opIsNil.buildCall(n.sons[1])
     else:
       if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true):
         # XXX algebraic simplifications!  'i-1 < a.len' --> 'i < a.len+1'
@@ -217,7 +358,7 @@ proc addFactNeg*(m: var TModel, n: PNode) =
   let n = n.neg
   if n != nil: addFact(m, n)
 
-proc sameTree(a, b: PNode): bool = 
+proc sameTree*(a, b: PNode): bool = 
   result = false
   if a == b:
     result = true
@@ -484,7 +625,7 @@ proc factImplies(fact, prop: PNode): TImplication =
     #  == not a or not b == not (a and b)
     let arg = fact.sons[1]
     case arg.getMagic
-    of mIsNil:
+    of mIsNil, mEqRef:
       return ~factImplies(arg, prop)
     of mAnd:
       # not (a and b)  means  not a or not b:
@@ -519,7 +660,144 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication =
       if result != impUnknown: return
 
 proc impliesNotNil*(facts: TModel, arg: PNode): TImplication =
-  result = doesImply(facts, buildIsNil(arg).neg)
+  result = doesImply(facts, opIsNil.buildCall(arg).neg)
+
+proc simpleSlice*(a, b: PNode): BiggestInt =
+  # returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched
+  # as if it is (i+0)..(i+0).
+  if guards.sameTree(a, b):
+    if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}:
+      result = a[2].intVal
+    else:
+      result = 0
+  else:
+    result = -1
+
+proc pleViaModel(model: TModel; aa, bb: PNode): TImplication
+
+proc ple(m: TModel; a, b: PNode): TImplication =
+  template `<=?`(a,b): expr = ple(m,a,b) == impYes
+  #   0 <= 3
+  if a.isValue and b.isValue:
+    return if leValue(a, b): impYes else: impNo
+
+  # use type information too:  x <= 4  iff  high(x) <= 4
+  if b.isValue and a.typ != nil and a.typ.isOrdinalType:
+    if lastOrd(a.typ) <= b.intVal: return impYes
+  # 3 <= x   iff  low(x) <= 3
+  if a.isValue and b.typ != nil and b.typ.isOrdinalType:
+    if firstOrd(b.typ) <= a.intVal: return impYes
+
+  # x <= x
+  if sameTree(a, b): return impYes
+
+  # 0 <= x.len
+  if b.getMagic in someLen and a.isValue:
+    if a.intVal <= 0: return impYes
+
+  #   x <= y+c  if 0 <= c and x <= y
+  if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes
+
+  #   x+c <= y  if c <= 0 and x <= y
+  if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes
+
+  #   x <= y*c  if  1 <= c and x <= y  and 0 <= y
+  if b.getMagic in someMul:
+    if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes
+
+  #   x div c <= y   if   1 <= c  and  0 <= y  and x <= y:
+  if a.getMagic in someDiv:
+    if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes
+
+  # slightly subtle:
+  # x <= max(y, z)  iff x <= y or x <= z
+  # note that 'x <= max(x, z)' is a special case of the above rule
+  if b.getMagic in someMax:
+    if a <=? b[1] or a <=? b[2]: return impYes
+
+  # min(x, y) <= z  iff x <= z or y <= z
+  if a.getMagic in someMin:
+    if a[1] <=? b or a[2] <=? b: return impYes
+
+  # use the knowledge base:
+  return pleViaModel(m, a, b)
+  #return doesImply(m, opLe.buildCall(a, b))
+
+type TReplacements = seq[tuple[a,b: PNode]]
+
+proc replaceSubTree(n, x, by: PNode): PNode =
+  if sameTree(n, x):
+    result = by
+  elif hasSubTree(n, x):
+    result = shallowCopy(n)
+    for i in 0 .. safeLen(n)-1:
+      result.sons[i] = replaceSubTree(n.sons[i], x, by)
+  else:
+    result = n
+
+proc applyReplacements(n: PNode; rep: TReplacements): PNode =
+  result = n
+  for x in rep: result = result.replaceSubTree(x.a, x.b)
+
+proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication =
+  # now check for inferrable facts: a <= b and b <= c  implies a <= c
+  for i in 0..m.high:
+    let fact = m[i]
+    if fact != nil and fact.getMagic in someLe:
+      # x <= y implies a <= b  if  a <= x and y <= b
+      let x = fact[1]
+      let y = fact[2]
+      # mark as used:
+      m[i] = nil
+      if ple(m, a, x) == impYes:
+        if ple(m, y, b) == impYes: return impYes
+        #if pleViaModelRec(m, y, b): return impYes
+      # fact:  16 <= i
+      #         x    y
+      # question: i <= 15? no!
+      result = impliesLe(fact, a, b)
+      if result != impUnknown: return result
+      if sameTree(y, a):
+        result = ple(m, x, b)
+        if result != impUnknown: return result
+
+proc pleViaModel(model: TModel; aa, bb: PNode): TImplication =
+  # compute replacements:
+  var replacements: TReplacements = @[]
+  for fact in model:
+    if fact != nil and fact.getMagic in someEq:
+      let a = fact[1]
+      let b = fact[2]
+      if a.kind == nkSym: replacements.add((a,b))
+      else: replacements.add((b,a))
+  var m: TModel
+  var a = aa
+  var b = bb
+  if replacements.len > 0:
+    m = @[]
+    # make the other facts consistent:
+    for fact in model:
+      if fact != nil and fact.getMagic notin someEq:
+        # XXX 'canon' should not be necessary here, but it is
+        m.add applyReplacements(fact, replacements).canon
+    a = applyReplacements(aa, replacements)
+    b = applyReplacements(bb, replacements)
+  else:
+    # we have to make a copy here, because the model will be modified:
+    m = model
+  result = pleViaModelRec(m, a, b)
+
+proc proveLe*(m: TModel; a, b: PNode): TImplication =
+  let x = canon(opLe.buildCall(a, b))
+  #echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2])
+  result = ple(m, x[1], x[2])
+  if result == impUnknown:
+    # try an alternative:  a <= b  iff  not (b < a)  iff  not (b+1 <= a):
+    let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a))
+    result = ~ple(m, y[1], y[2])
+
+proc addFactLe*(m: var TModel; a, b: PNode) =
+  m.add canon(opLe.buildCall(a, b))
 
 proc settype(n: PNode): PType =
   result = newType(tySet, n.typ.owner)
diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim
index 1b9e5fe0f..e2afa4362 100644
--- a/compiler/lowerings.nim
+++ b/compiler/lowerings.nim
@@ -13,6 +13,8 @@ const
   genPrefix* = ":tmp"         # prefix for generated names
 
 import ast, astalgo, types, idents, magicsys, msgs, options
+from guards import createMagic
+from trees import getMagic
 
 proc newTupleAccess*(tup: PNode, i: int): PNode =
   result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes(
@@ -68,6 +70,7 @@ proc addField*(obj: PType; s: PSym) =
   var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info)
   let t = skipIntLit(s.typ)
   field.typ = t
+  assert t.kind != tyStmt
   field.position = sonsLen(obj.n)
   addSon(obj.n, newSymNode(field))
 
@@ -79,19 +82,30 @@ proc newDotExpr(obj, b: PSym): PNode =
   addSon(result, newSymNode(field))
   result.typ = field.typ
 
-proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = 
+proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = 
   # returns a[].b as a node
   var deref = newNodeI(nkHiddenDeref, info)
-  deref.typ = a.typ.sons[0]
-  assert deref.typ.kind == tyObject
-  let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id))
-  assert field != nil, b.name.s
+  deref.typ = a.typ.skipTypes(abstractInst).sons[0]
+  var t = deref.typ.skipTypes(abstractInst)
+  var field: PSym
+  while true:
+    assert t.kind == tyObject
+    field = getSymFromList(t.n, getIdent(b))
+    if field != nil: break
+    t = t.sons[0]
+    if t == nil: break
+    t = t.skipTypes(abstractInst)
+  assert field != nil, b
   addSon(deref, a)
   result = newNodeI(nkDotExpr, info)
   addSon(result, deref)
   addSon(result, newSymNode(field))
   result.typ = field.typ
 
+proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = 
+  # returns a[].b as a node
+  result = indirectAccess(a, b.name.s & $b.id, info)
+
 proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode =
   result = indirectAccess(newSymNode(a), b, info)
 
@@ -101,6 +115,11 @@ proc genAddrOf*(n: PNode): PNode =
   result.typ = newType(tyPtr, n.typ.owner)
   result.typ.rawAddSon(n.typ)
 
+proc genDeref*(n: PNode): PNode =
+  result = newNodeIT(nkHiddenDeref, n.info, 
+                     n.typ.skipTypes(abstractInst).sons[0])
+  result.add n
+
 proc callCodegenProc*(name: string, arg1: PNode; 
                       arg2, arg3: PNode = nil): PNode =
   result = newNodeI(nkCall, arg1.info)
@@ -112,13 +131,120 @@ proc callCodegenProc*(name: string, arg1: PNode;
     result.add arg1
     if arg2 != nil: result.add arg2
     if arg3 != nil: result.add arg3
+    result.typ = sym.typ.sons[0]
+
+proc callProc(a: PNode): PNode =
+  result = newNodeI(nkCall, a.info)
+  result.add a
+  result.typ = a.typ.sons[0]
+
+# we have 4 cases to consider:
+# - a void proc --> nothing to do
+# - a proc returning GC'ed memory --> requires a flowVar
+# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter
+# - not in a parallel environment --> requires a flowVar for memory safety
+type
+  TSpawnResult = enum
+    srVoid, srFlowVar, srByVar
+  TFlowVarKind = enum
+    fvInvalid # invalid type T for 'FlowVar[T]'
+    fvGC      # FlowVar of a GC'ed type
+    fvBlob    # FlowVar of a blob type
+
+proc spawnResult(t: PType; inParallel: bool): TSpawnResult =
+  if t.isEmptyType: srVoid
+  elif inParallel and not containsGarbageCollectedRef(t): srByVar
+  else: srFlowVar
+
+proc flowVarKind(t: PType): TFlowVarKind =
+  if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: fvGC
+  elif containsGarbageCollectedRef(t): fvInvalid
+  else: fvBlob
+
+proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym =
+  result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info)
+  result.typ = typ
+  incl(result.flags, sfFromGeneric)
+
+  var vpart = newNodeI(nkIdentDefs, varSection.info, 3)
+  vpart.sons[0] = newSymNode(result)
+  vpart.sons[1] = ast.emptyNode
+  vpart.sons[2] = v
+  varSection.add vpart
+
+discard """
+We generate roughly this:
+
+proc f_wrapper(thread, args) =
+  barrierEnter(args.barrier)  # for parallel statement
+  var a = args.a # thread transfer; deepCopy or shallowCopy or no copy
+                 # depending on whether we're in a 'parallel' statement
+  var b = args.b
+  var fv = args.fv
+
+  fv.owner = thread # optional
+  nimArgsPassingDone() # signal parent that the work is done
+  # 
+  args.fv.blob = f(a, b, ...)
+  nimFlowVarSignal(args.fv)
+  
+  # - or -
+  f(a, b, ...)
+  barrierLeave(args.barrier)  # for parallel statement
+
+stmtList:
+  var scratchObj
+  scratchObj.a = a
+  scratchObj.b = b
+
+  nimSpawn(f_wrapper, addr scratchObj)
+  scratchObj.fv # optional
+
+"""
 
 proc createWrapperProc(f: PNode; threadParam, argsParam: PSym;
-                       varSection, call: PNode): PSym =
+                       varSection, call, barrier, fv: PNode;
+                       spawnKind: TSpawnResult): PSym =
   var body = newNodeI(nkStmtList, f.info)
+  var threadLocalBarrier: PSym
+  if barrier != nil:
+    var varSection = newNodeI(nkVarSection, barrier.info)
+    threadLocalBarrier = addLocalVar(varSection, argsParam.owner, 
+                                     barrier.typ, barrier)
+    body.add varSection
+    body.add callCodeGenProc("barrierEnter", threadLocalBarrier.newSymNode)
+  var threadLocalProm: PSym
+  if spawnKind == srByVar:
+    threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv)
+  elif fv != nil:
+    internalAssert fv.typ.kind == tyGenericInst
+    threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv)
+    
   body.add varSection
-  body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam))
-  body.add call
+  if fv != nil and spawnKind != srByVar:
+    # generate:
+    #   fv.owner = threadParam
+    body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode,
+      "owner", fv.info), threadParam.newSymNode)
+
+  body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode)
+  if spawnKind == srByVar:
+    body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call)
+  elif fv != nil:
+    let fk = fv.typ.sons[1].flowVarKind
+    if fk == fvInvalid:
+      localError(f.info, "cannot create a flowVar of type: " & 
+        typeToString(fv.typ.sons[1]))
+    body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode,
+      if fk == fvGC: "data" else: "blob", fv.info), call)
+    if barrier == nil:
+      # by now 'fv' is shared and thus might have beeen overwritten! we need
+      # to use the thread-local view instead:
+      body.add callCodeGenProc("nimFlowVarSignal", threadLocalProm.newSymNode)
+  else:
+    body.add call
+  if barrier != nil:
+    body.add callCodeGenProc("barrierLeave", threadLocalBarrier.newSymNode)
 
   var params = newNodeI(nkFormalParams, f.info)
   params.add emptyNode
@@ -146,10 +272,152 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode =
   result.typ = newType(tyPtr, objType.owner)
   result.typ.rawAddSon(objType)
 
-proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
-  result = newNodeI(nkStmtList, n.info)
-  if n.kind notin nkCallKinds or not n.typ.isEmptyType:
-    localError(n.info, "'spawn' takes a call expression of type void")
+proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, 
+                             castExpr, call, varSection, result: PNode) =
+  let formals = n[0].typ.n
+  let tmpName = getIdent(genPrefix)
+  for i in 1 .. <n.len:
+    # we pick n's type here, which hopefully is 'tyArray' and not
+    # 'tyOpenArray':
+    var argType = n[i].typ.skipTypes(abstractInst)
+    if i < formals.len and formals[i].typ.kind == tyVar:
+      localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
+    elif containsTyRef(argType):
+      localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
+
+    let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
+    var field = newSym(skField, fieldname, objType.owner, n.info)
+    field.typ = argType
+    objType.addField(field)
+    result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
+
+    let temp = addLocalVar(varSection, objType.owner, argType,
+                           indirectAccess(castExpr, field, n.info))    
+    call.add(newSymNode(temp))
+
+proc getRoot*(n: PNode): PSym =
+  ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression
+  ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be
+  ## determined as the owner; ``obj`` in the example.
+  case n.kind
+  of nkSym:
+    if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}:
+      result = n.sym
+  of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr,
+      nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr:
+    result = getRoot(n.sons[0])
+  of nkHiddenStdConv, nkHiddenSubConv, nkConv:
+    result = getRoot(n.sons[1])
+  of nkCallKinds:
+    if getMagic(n) == mSlice: result = getRoot(n.sons[1])
+  else: discard
+
+proc newIntLit(value: BiggestInt): PNode =
+  result = nkIntLit.newIntNode(value)
+  result.typ = getSysType(tyInt)
+
+proc genHigh(n: PNode): PNode =
+  if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}:
+    result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar)))
+  else:
+    result = newNodeI(nkCall, n.info, 2)
+    result.typ = getSysType(tyInt)
+    result.sons[0] = newSymNode(createMagic("high", mHigh))
+    result.sons[1] = n
+
+proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym;
+                             castExpr, call, varSection, result: PNode) =
+  let formals = n[0].typ.n
+  let tmpName = getIdent(genPrefix)
+  # we need to copy the foreign scratch object fields into local variables
+  # for correctness: These are called 'threadLocal' here.
+  for i in 1 .. <n.len:
+    let n = n[i]
+    let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ,
+                            abstractInst)
+    if containsTyRef(argType):
+      localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure")
+
+    let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
+    var field = newSym(skField, fieldname, objType.owner, n.info)
+
+    if argType.kind in {tyVarargs, tyOpenArray}:
+      # important special case: we always create a zero-copy slice:
+      let slice = newNodeI(nkCall, n.info, 4)
+      slice.typ = n.typ
+      slice.sons[0] = newSymNode(createMagic("slice", mSlice))
+      var fieldB = newSym(skField, tmpName, objType.owner, n.info)
+      fieldB.typ = getSysType(tyInt)
+      objType.addField(fieldB)
+      
+      if getMagic(n) == mSlice:
+        let a = genAddrOf(n[1])
+        field.typ = a.typ
+        objType.addField(field)
+        result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
+
+        var fieldA = newSym(skField, tmpName, objType.owner, n.info)
+        fieldA.typ = getSysType(tyInt)
+        objType.addField(fieldA)
+        result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2])
+        result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3])
+
+        let threadLocal = addLocalVar(varSection, objType.owner, fieldA.typ,
+                                      indirectAccess(castExpr, fieldA, n.info))
+        slice.sons[2] = threadLocal.newSymNode
+      else:
+        let a = genAddrOf(n)
+        field.typ = a.typ
+        objType.addField(field)
+        result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
+        result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n))
+
+        slice.sons[2] = newIntLit(0)
+      # the array itself does not need to go through a thread local variable:
+      slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info))
+
+      let threadLocal = addLocalVar(varSection, objType.owner, fieldB.typ,
+                                    indirectAccess(castExpr, fieldB, n.info))
+      slice.sons[3] = threadLocal.newSymNode
+      call.add slice
+    elif (let size = computeSize(argType); size < 0 or size > 16) and
+        n.getRoot != nil:
+      # it is more efficient to pass a pointer instead:
+      let a = genAddrOf(n)
+      field.typ = a.typ
+      objType.addField(field)
+      result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
+      let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
+                                    indirectAccess(castExpr, field, n.info))
+      call.add(genDeref(threadLocal.newSymNode))
+    else:
+      # boring case
+      field.typ = argType
+      objType.addField(field)
+      result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n)
+      let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
+                                    indirectAccess(castExpr, field, n.info))
+      call.add(threadLocal.newSymNode)
+
+proc wrapProcForSpawn*(owner: PSym; spawnExpr: PNode; retType: PType; 
+                       barrier, dest: PNode = nil): PNode =
+  # if 'barrier' != nil, then it is in a 'parallel' section and we
+  # generate quite different code
+  let n = spawnExpr[1]
+  let spawnKind = spawnResult(retType, barrier!=nil)
+  case spawnKind
+  of srVoid:
+    internalAssert dest == nil
+    result = newNodeI(nkStmtList, n.info)
+  of srFlowVar:
+    internalAssert dest == nil
+    result = newNodeIT(nkStmtListExpr, n.info, retType)
+  of srByVar:
+    if dest == nil: localError(n.info, "'spawn' must not be discarded")
+    result = newNodeI(nkStmtList, n.info)
+
+  if n.kind notin nkCallKinds:
+    localError(n.info, "'spawn' takes a call expression")
     return
   if optThreadAnalysis in gGlobalOptions:
     if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}:
@@ -162,6 +430,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
     threadParam.typ = ptrType
     argsParam.typ = ptrType
     argsParam.position = 1
+
   var objType = createObj(owner, n.info)
   incl(objType.flags, tfFinal)
   let castExpr = createCastExpr(argsParam, objType)
@@ -174,7 +443,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
     varSectionB.addVar(scratchObj.newSymNode)
     result.add varSectionB
 
-  var call = newNodeI(nkCall, n.info)
+  var call = newNodeIT(nkCall, n.info, n.typ)
   var fn = n.sons[0]
   # templates and macros are in fact valid here due to the nature of
   # the transformation:
@@ -194,35 +463,44 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
 
   call.add(fn)
   var varSection = newNodeI(nkVarSection, n.info)
-  let formals = n[0].typ.n
-  let tmpName = getIdent(genPrefix)
-  for i in 1 .. <n.len:
-    # we pick n's type here, which hopefully is 'tyArray' and not
-    # 'tyOpenArray':
-    var argType = n[i].typ.skipTypes(abstractInst)
-    if i < formals.len and formals[i].typ.kind == tyVar:
-      localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
-    elif containsTyRef(argType):
-      localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
+  if barrier.isNil:
+    setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result)
+  else: 
+    setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result)
 
-    let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
-    var field = newSym(skField, fieldname, owner, n.info)
-    field.typ = argType
+  var barrierAsExpr: PNode = nil
+  if barrier != nil:
+    let typ = newType(tyPtr, owner)
+    typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ)
+    var field = newSym(skField, getIdent"barrier", owner, n.info)
+    field.typ = typ
     objType.addField(field)
-    result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
-
-    var temp = newSym(skTemp, tmpName, owner, n.info)
-    temp.typ = argType
-    incl(temp.flags, sfFromGeneric)
+    result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier)
+    barrierAsExpr = indirectAccess(castExpr, field, n.info)
 
-    var vpart = newNodeI(nkIdentDefs, n.info, 3)
-    vpart.sons[0] = newSymNode(temp)
-    vpart.sons[1] = ast.emptyNode
-    vpart.sons[2] = indirectAccess(castExpr, field, n.info)
-    varSection.add vpart
+  var fvField, fvAsExpr: PNode = nil
+  if spawnKind == srFlowVar:
+    var field = newSym(skField, getIdent"fv", owner, n.info)
+    field.typ = retType
+    objType.addField(field)
+    fvField = newDotExpr(scratchObj, field)
+    fvAsExpr = indirectAccess(castExpr, field, n.info)
+    # create flowVar:
+    result.add newFastAsgnStmt(fvField, callProc(spawnExpr[2]))
+    if barrier == nil:
+      result.add callCodeGenProc("nimFlowVarCreateCondVar", fvField)
 
-    call.add(newSymNode(temp))
+  elif spawnKind == srByVar:
+    var field = newSym(skField, getIdent"fv", owner, n.info)
+    field.typ = newType(tyPtr, objType.owner)
+    field.typ.rawAddSon(retType)
+    objType.addField(field)
+    fvAsExpr = indirectAccess(castExpr, field, n.info)
+    result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest))
 
-  let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call)
+  let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call,
+                                  barrierAsExpr, fvAsExpr, spawnKind)
   result.add callCodeGenProc("nimSpawn", wrapper.newSymNode,
                              genAddrOf(scratchObj.newSymNode))
+
+  if spawnKind == srFlowVar: result.add fvField
diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim
index f2d8988ea..a17773aa4 100644
--- a/compiler/pragmas.nim
+++ b/compiler/pragmas.nim
@@ -644,12 +644,13 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: int,
           incl(sym.flags, sfNoReturn)
         of wDynlib: 
           processDynLib(c, it, sym)
-        of wCompilerproc: 
+        of wCompilerproc:
           noVal(it)           # compilerproc may not get a string!
-          makeExternExport(sym, "$1", it.info)
-          incl(sym.flags, sfCompilerProc)
-          incl(sym.flags, sfUsed) # suppress all those stupid warnings
-          registerCompilerProc(sym)
+          if sfFromGeneric notin sym.flags:
+            makeExternExport(sym, "$1", it.info)
+            incl(sym.flags, sfCompilerProc)
+            incl(sym.flags, sfUsed) # suppress all those stupid warnings
+            registerCompilerProc(sym)
         of wProcVar: 
           noVal(it)
           incl(sym.flags, sfProcvar)
diff --git a/compiler/sem.nim b/compiler/sem.nim
index 0c793eeba..e4ef6473f 100644
--- a/compiler/sem.nim
+++ b/compiler/sem.nim
@@ -15,7 +15,8 @@ import
   magicsys, parser, nversion, nimsets, semfold, importer,
   procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
   intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
-  evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity
+  evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity,
+  semparallel
 
 # implementation
 
diff --git a/compiler/semdata.nim b/compiler/semdata.nim
index d2b2f90cd..abecc1b6d 100644
--- a/compiler/semdata.nim
+++ b/compiler/semdata.nim
@@ -91,6 +91,7 @@ type
     generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile
     lastGenericIdx*: int      # used for the generics stack
     hloLoopDetector*: int     # used to prevent endless loops in the HLO
+    inParallelStmt*: int
    
 proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair =
   result.genericSym = s
diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim
index 99ffa9b54..078e95fbe 100644
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -1394,11 +1394,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode =
   result.info = n.info
   result.typ = getSysType(tyBool)
 
-proc setMs(n: PNode, s: PSym): PNode = 
-  result = n
-  n.sons[0] = newSymNode(s)
-  n.sons[0].info = n.info
-
 proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym =
   ## The argument to the proc should be nkCall(...) or similar
   ## Returns the macro/template symbol
@@ -1590,6 +1585,27 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode =
   else:
     result = semDirectOp(c, n, flags)
 
+proc createFlowVar(c: PContext; t: PType; info: TLineInfo): PType =
+  result = newType(tyGenericInvokation, c.module)
+  addSonSkipIntLit(result, magicsys.getCompilerProc("FlowVar").typ)
+  addSonSkipIntLit(result, t)
+  result = instGenericContainer(c, info, result, allowMetaTypes = false)
+
+proc instantiateCreateFlowVarCall(c: PContext; t: PType;
+                                  info: TLineInfo): PSym =
+  let sym = magicsys.getCompilerProc("nimCreateFlowVar")
+  if sym == nil:
+    localError(info, errSystemNeeds, "nimCreateFlowVar")
+  var bindings: TIdTable 
+  initIdTable(bindings)
+  bindings.idTablePut(sym.ast[genericParamsPos].sons[0].typ, t)
+  result = c.semGenerateInstance(c, sym, bindings, info)
+
+proc setMs(n: PNode, s: PSym): PNode = 
+  result = n
+  n.sons[0] = newSymNode(s)
+  n.sons[0].info = n.info
+
 proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = 
   # this is a hotspot in the compiler!
   # DON'T forget to update ast.SpecialSemMagics if you add a magic here!
@@ -1611,6 +1627,22 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
     checkSonsLen(n, 2)
     result = newStrNodeT(renderTree(n[1], {renderNoComments}), n)
     result.typ = getSysType(tyString)
+  of mParallel:
+    result = setMs(n, s)
+    var x = n.lastSon
+    if x.kind == nkDo: x = x.sons[bodyPos]
+    inc c.inParallelStmt
+    result.sons[1] = semStmt(c, x)
+    dec c.inParallelStmt
+  of mSpawn:
+    result = setMs(n, s)
+    result.sons[1] = semExpr(c, n.sons[1])
+    if not result[1].typ.isEmptyType:
+      if c.inParallelStmt > 0:
+        result.typ = result[1].typ
+      else:
+        result.typ = createFlowVar(c, result[1].typ, n.info)
+      result.add instantiateCreateFlowVarCall(c, result[1].typ, n.info).newSymNode
   else: result = semDirectOp(c, n, flags)
 
 proc semWhen(c: PContext, n: PNode, semCheck = true): PNode =
diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim
index 4caf1fb8e..f943e7006 100644
--- a/compiler/semmagic.nim
+++ b/compiler/semmagic.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2014 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -131,4 +131,3 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode,
   of mNBindSym: result = semBindSym(c, n)
   of mLocals: result = semLocals(c, n)
   else: result = n
-
diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim
new file mode 100644
index 000000000..c594a4788
--- /dev/null
+++ b/compiler/semparallel.nim
@@ -0,0 +1,465 @@
+#
+#
+#           The Nimrod Compiler
+#        (c) Copyright 2014 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Semantic checking for 'parallel'.
+
+# - codegen needs to support mSlice (+)
+# - lowerings must not perform unnecessary copies (+)
+# - slices should become "nocopy" to openArray (+)
+#   - need to perform bound checks (+)
+#
+# - parallel needs to insert a barrier (+)
+# - passed arguments need to be ensured to be "const"
+#   - what about 'f(a)'? --> f shouldn't have side effects anyway
+# - passed arrays need to be ensured not to alias
+# - passed slices need to be ensured to be disjoint (+)
+# - output slices need special logic (+)
+
+import
+  ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs,
+  renderer
+from trees import getMagic
+from strutils import `%`
+
+discard """
+
+one major problem:
+  spawn f(a[i])
+  inc i
+  spawn f(a[i])
+is valid, but
+  spawn f(a[i])
+  spawn f(a[i])
+  inc i
+is not! However, 
+  spawn f(a[i])
+  if guard: inc i
+  spawn f(a[i])
+is not valid either! --> We need a flow dependent analysis here.
+
+However:
+  while foo:
+    spawn f(a[i])
+    inc i
+    spawn f(a[i])
+
+Is not valid either! --> We should really restrict 'inc' to loop endings?
+
+The heuristic that we implement here (that has no false positives) is: Usage
+of 'i' in a slice *after* we determined the stride is invalid!
+"""
+
+type
+  TDirection = enum
+    ascending, descending
+  MonotonicVar = object
+    v, alias: PSym        # to support the ordinary 'countup' iterator
+                          # we need to detect aliases
+    lower, upper, stride: PNode
+    dir: TDirection
+    blacklisted: bool     # blacklisted variables that are not monotonic
+  AnalysisCtx = object
+    locals: seq[MonotonicVar]
+    slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]]
+    guards: TModel      # nested guards
+    args: seq[PSym]     # args must be deeply immutable
+    spawns: int         # we can check that at last 1 spawn is used in
+                        # the 'parallel' section
+    currentSpawnId: int
+    inLoop: int
+
+let opSlice = createMagic("slice", mSlice)
+
+proc initAnalysisCtx(): AnalysisCtx =
+  result.locals = @[]
+  result.slices = @[]
+  result.args = @[]
+  result.guards = @[]
+
+proc lookupSlot(c: AnalysisCtx; s: PSym): int =
+  for i in 0.. <c.locals.len:
+    if c.locals[i].v == s or c.locals[i].alias == s: return i
+  return -1
+
+proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar =
+  let s = lookupSlot(c, v)
+  if s >= 0: return addr(c.locals[s])
+  let L = c.locals.len
+  c.locals.setLen(L+1)
+  c.locals[L].v = v
+  return addr(c.locals[L])
+
+proc gatherArgs(c: var AnalysisCtx; n: PNode) =
+  for i in 0.. <n.safeLen:
+    let root = getRoot n[i]
+    if root != nil:
+      block addRoot:
+        for r in items(c.args):
+          if r == root: break addRoot
+        c.args.add root
+    gatherArgs(c, n[i])
+
+proc isSingleAssignable(n: PNode): bool =
+  n.kind == nkSym and (let s = n.sym;
+    s.kind in {skTemp, skForVar, skLet} and
+          {sfAddrTaken, sfGlobal} * s.flags == {})
+
+proc isLocal(n: PNode): bool =
+  n.kind == nkSym and (let s = n.sym;
+    s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
+          {sfAddrTaken, sfGlobal} * s.flags == {})
+
+proc checkLocal(c: AnalysisCtx; n: PNode) =
+  if isLocal(n):
+    let s = c.lookupSlot(n.sym)
+    if s >= 0 and c.locals[s].stride != nil:
+      localError(n.info, "invalid usage of counter after increment")
+  else:
+    for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])
+
+template `?`(x): expr = x.renderTree
+
+proc checkLe(c: AnalysisCtx; a, b: PNode) =
+  case proveLe(c.guards, a, b)
+  of impUnknown:
+    localError(a.info, "cannot prove: " & ?a & " <= " & ?b)
+  of impYes: discard
+  of impNo:
+    localError(a.info, "can prove: " & ?a & " > " & ?b)
+
+proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
+  checkLe(c, arr.lowBound, idx)
+  checkLe(c, idx, arr.highBound)
+
+proc addLowerBoundAsFacts(c: var AnalysisCtx) =
+  for v in c.locals:
+    if not v.blacklisted:
+      c.guards.addFactLe(v.lower, newSymNode(v.v))
+
+proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) =
+  checkLocal(c, n)
+  let le = le.canon
+  let ri = ri.canon
+  # perform static bounds checking here; and not later!
+  let oldState = c.guards.len
+  addLowerBoundAsFacts(c)
+  c.checkBounds(x, le)
+  c.checkBounds(x, ri)
+  c.guards.setLen(oldState)
+  c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))
+
+proc overlap(m: TModel; x,y,c,d: PNode) =
+  #  X..Y and C..D overlap iff (X <= D and C <= Y)
+  case proveLe(m, x, d)
+  of impUnknown:
+    localError(x.info,
+      "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
+        [?x, ?d, ?x, ?y, ?c, ?d])
+  of impYes:
+    case proveLe(m, c, y)
+    of impUnknown:
+      localError(x.info,
+        "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
+          [?c, ?y, ?x, ?y, ?c, ?d])
+    of impYes:
+      localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d])
+    of impNo: discard
+  of impNo: discard
+
+proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
+  if isLocal(n):
+    let s = c.lookupSlot(n.sym)
+    if s >= 0 and c.locals[s].stride != nil:
+      result = c.locals[s].stride.intVal
+  else:
+    for i in 0 .. <n.safeLen: result += stride(c, n.sons[i])
+
+proc subStride(c: AnalysisCtx; n: PNode): PNode =
+  # substitute with stride:
+  if isLocal(n):
+    let s = c.lookupSlot(n.sym)
+    if s >= 0 and c.locals[s].stride != nil:
+      result = n +@ c.locals[s].stride.intVal
+    else:
+      result = n
+  elif n.safeLen > 0:
+    result = shallowCopy(n)
+    for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i])
+  else:
+    result = n
+
+proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
+  # this is the only thing that we need to perform after we have traversed
+  # the whole tree so that the strides are available.
+  # First we need to add all the computed lower bounds:
+  addLowerBoundAsFacts(c)
+  # Every slice used in a loop needs to be disjoint with itself:
+  for x,a,b,id,inLoop in items(c.slices):
+    if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b))
+  # Another tricky example is:
+  #   while true:
+  #     spawn f(a[i])
+  #     spawn f(a[i+1])
+  #     inc i  # inc i, 2  would be correct here
+  #
+  # Or even worse:
+  #   while true:
+  #     spawn f(a[i+1 .. i+3])
+  #     spawn f(a[i+4 .. i+5])
+  #     inc i, 4
+  # Prove that i*k*stride + 3 != i*k'*stride + 5
+  # For the correct example this amounts to
+  #   i*k*2 != i*k'*2 + 1
+  # which is true.
+  # For now, we don't try to prove things like that at all, even though it'd
+  # be feasible for many useful examples. Instead we attach the slice to
+  # a spawn and if the attached spawns differ, we bail out:
+  for i in 0 .. high(c.slices):
+    for j in i+1 .. high(c.slices):
+      let x = c.slices[i]
+      let y = c.slices[j]
+      if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
+        if not x.inLoop or not y.inLoop:
+          # XXX strictly speaking, 'or' is not correct here and it needs to
+          # be 'and'. However this prevents too many obviously correct programs
+          # like f(a[0..x]); for i in x+1 .. a.high: f(a[i])
+          overlap(c.guards, x.a, x.b, y.a, y.b)
+        elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b);
+              k >= 0 and m >= 0):
+          # ah I cannot resist the temptation and add another sweet heuristic:
+          # if both slices have the form (i+k)..(i+k)  and (i+m)..(i+m) we
+          # check they are disjoint and k < stride and m < stride:
+          overlap(c.guards, x.a, x.b, y.a, y.b)
+          let stride = min(c.stride(x.a), c.stride(y.a))
+          if k < stride and m < stride:
+            discard
+          else:
+            localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
+              [?x.a, ?x.b, ?y.a, ?y.b])
+        else:
+          localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
+            [?x.a, ?x.b, ?y.a, ?y.b])
+
+proc analyse(c: var AnalysisCtx; n: PNode)
+
+proc analyseSons(c: var AnalysisCtx; n: PNode) =
+  for i in 0 .. <safeLen(n): analyse(c, n[i])
+
+proc min(a, b: PNode): PNode =
+  if a.isNil: result = b
+  elif a.intVal < b.intVal: result = a
+  else: result = b
+
+proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags
+
+proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
+  if op.magic == mSpawn:
+    inc c.spawns
+    let oldSpawnId = c.currentSpawnId
+    c.currentSpawnId = c.spawns
+    gatherArgs(c, n[1])
+    analyseSons(c, n)
+    c.currentSpawnId = oldSpawnId
+  elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem):
+    if n[1].isLocal:
+      let incr = n[2].skipConv
+      if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
+        let slot = c.getSlot(n[1].sym)
+        slot.stride = min(slot.stride, incr)
+    analyseSons(c, n)
+  elif op.name.s == "[]" and op.fromSystem:
+    c.addSlice(n, n[1], n[2][1], n[2][2])
+    analyseSons(c, n)
+  elif op.name.s == "[]=" and op.fromSystem:
+    c.addSlice(n, n[1], n[2][1], n[2][2])
+    analyseSons(c, n)
+  else:
+    analyseSons(c, n)
+
+proc analyseCase(c: var AnalysisCtx; n: PNode) =
+  analyse(c, n.sons[0])
+  let oldFacts = c.guards.len
+  for i in 1.. <n.len:
+    let branch = n.sons[i]
+    setLen(c.guards, oldFacts)
+    addCaseBranchFacts(c.guards, n, i)
+    for i in 0 .. <branch.len:
+      analyse(c, branch.sons[i])
+  setLen(c.guards, oldFacts)
+
+proc analyseIf(c: var AnalysisCtx; n: PNode) =
+  analyse(c, n.sons[0].sons[0])
+  let oldFacts = c.guards.len
+  addFact(c.guards, canon(n.sons[0].sons[0]))
+
+  analyse(c, n.sons[0].sons[1])
+  for i in 1.. <n.len:
+    let branch = n.sons[i]
+    setLen(c.guards, oldFacts)
+    for j in 0..i-1:
+      addFactNeg(c.guards, canon(n.sons[j].sons[0]))
+    if branch.len > 1:
+      addFact(c.guards, canon(branch.sons[0]))
+    for i in 0 .. <branch.len:
+      analyse(c, branch.sons[i])
+  setLen(c.guards, oldFacts)
+
+proc analyse(c: var AnalysisCtx; n: PNode) =
+  case n.kind
+  of nkAsgn, nkFastAsgn:
+    if n[0].isSingleAssignable and n[1].isLocal:
+      let slot = c.getSlot(n[1].sym)
+      slot.alias = n[0].sym
+    elif n[0].isLocal:
+      # since we already ensure sfAddrTaken is not in s.flags, we only need to
+      # prevent direct assignments to the monotonic variable:
+      let slot = c.getSlot(n[0].sym)
+      slot.blackListed = true
+    invalidateFacts(c.guards, n[0])
+    analyseSons(c, n)
+    addAsgnFact(c.guards, n[0], n[1])
+  of nkCallKinds:
+    # direct call:
+    if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
+    else: analyseSons(c, n)
+  of nkBracketExpr:
+    c.addSlice(n, n[0], n[1], n[1])
+    analyseSons(c, n)
+  of nkReturnStmt, nkRaiseStmt, nkTryStmt:
+    localError(n.info, "invalid control flow for 'parallel'")
+    # 'break' that leaves the 'parallel' section is not valid either
+    # or maybe we should generate a 'try' XXX
+  of nkVarSection:
+    for it in n:
+      let value = it.lastSon
+      if value.kind != nkEmpty:
+        for j in 0 .. it.len-3:
+          if it[j].isLocal:
+            let slot = c.getSlot(it[j].sym)
+            if slot.lower.isNil: slot.lower = value
+            else: internalError(it.info, "slot already has a lower bound")
+        analyse(c, value)
+  of nkCaseStmt: analyseCase(c, n)
+  of nkIfStmt, nkIfExpr: analyseIf(c, n)
+  of nkWhileStmt:
+    analyse(c, n.sons[0])
+    # 'while true' loop?
+    inc c.inLoop
+    if isTrue(n.sons[0]):
+      analyseSons(c, n.sons[1])
+    else:
+      # loop may never execute:
+      let oldState = c.locals.len
+      let oldFacts = c.guards.len
+      addFact(c.guards, canon(n.sons[0]))
+      analyse(c, n.sons[1])
+      setLen(c.locals, oldState)
+      setLen(c.guards, oldFacts)
+      # we know after the loop the negation holds:
+      if not hasSubnodeWith(n.sons[1], nkBreakStmt):
+        addFactNeg(c.guards, canon(n.sons[0]))
+    dec c.inLoop
+  of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
+      nkMacroDef, nkTemplateDef, nkConstSection, nkPragma:
+    discard
+  else:
+    analyseSons(c, n)
+
+proc transformSlices(n: PNode): PNode =
+  if n.kind in nkCallKinds and n[0].kind == nkSym:
+    let op = n[0].sym
+    if op.name.s == "[]" and op.fromSystem:
+      result = copyNode(n)
+      result.add opSlice.newSymNode
+      result.add n[1]
+      result.add n[2][1]
+      result.add n[2][2]
+      return result
+  if n.safeLen > 0:
+    result = shallowCopy(n)
+    for i in 0 .. < n.len:
+      result.sons[i] = transformSlices(n.sons[i])
+  else:
+    result = n
+
+proc transformSpawn(owner: PSym; n, barrier: PNode): PNode
+proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode =
+  result = shallowCopy(n)
+  for i in 0 .. < n.len:
+    result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
+
+proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
+  case n.kind
+  of nkVarSection:
+    result = nil
+    for it in n:
+      let b = it.lastSon
+      if getMagic(b) == mSpawn:
+        if it.len != 3: localError(it.info, "invalid context for 'spawn'")
+        let m = transformSlices(b)
+        if result.isNil:
+          result = newNodeI(nkStmtList, n.info)
+          result.add n
+        result.add wrapProcForSpawn(owner, m, b.typ, barrier, it[0])
+        it.sons[it.len-1] = emptyNode
+    if result.isNil: result = n
+  of nkAsgn, nkFastAsgn:
+    let b = n[1]
+    if getMagic(b) == mSpawn:
+      let m = transformSlices(b)
+      return wrapProcForSpawn(owner, m, b.typ, barrier, n[0])
+    result = transformSpawnSons(owner, n, barrier)
+  of nkCallKinds:
+    if getMagic(n) == mSpawn:
+      result = transformSlices(n)
+      return wrapProcForSpawn(owner, result, n.typ, barrier, nil)
+    result = transformSpawnSons(owner, n, barrier)
+  elif n.safeLen > 0:
+    result = transformSpawnSons(owner, n, barrier)
+  else:
+    result = n
+
+proc checkArgs(a: var AnalysisCtx; n: PNode) =
+  discard "too implement"
+
+proc generateAliasChecks(a: AnalysisCtx; result: PNode) =
+  discard "too implement"
+
+proc liftParallel*(owner: PSym; n: PNode): PNode =
+  # this needs to be called after the 'for' loop elimination
+
+  # first pass:
+  # - detect monotonic local integer variables
+  # - detect used slices
+  # - detect used arguments
+  #echo "PAR ", renderTree(n)
+  
+  var a = initAnalysisCtx()
+  let body = n.lastSon
+  analyse(a, body)
+  if a.spawns == 0:
+    localError(n.info, "'parallel' section without 'spawn'")
+  checkSlicesAreDisjoint(a)
+  checkArgs(a, body)
+
+  var varSection = newNodeI(nkVarSection, n.info)
+  var temp = newSym(skTemp, getIdent"barrier", owner, n.info)
+  temp.typ = magicsys.getCompilerProc("Barrier").typ
+  incl(temp.flags, sfFromGeneric)
+  let tempNode = newSymNode(temp)
+  varSection.addVar tempNode
+
+  let barrier = genAddrOf(tempNode)
+  result = newNodeI(nkStmtList, n.info)
+  generateAliasChecks(a, result)
+  result.add varSection
+  result.add callCodeGenProc("openBarrier", barrier)
+  result.add transformSpawn(owner, body, barrier)
+  result.add callCodeGenProc("closeBarrier", barrier)
+
diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim
index 6afde5f05..c8ce5e787 100644
--- a/compiler/sempass2.nim
+++ b/compiler/sempass2.nim
@@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) =
   if n.kind != nkSym: return
   let s = n.sym
   if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}:
-    # 'x' is not nil, but that doesn't mean it's not nil children
+    # 'x' is not nil, but that doesn't mean its "not nil" children
     # are initialized:
     initVar(a, n)
 
@@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) =
   else:
     track(tracked, n)
 
-proc isTrue(n: PNode): bool =
+proc isTrue*(n: PNode): bool =
   n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
     n.kind == nkIntLit and n.intVal != 0
 
diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim
index 2dcca8f1e..9d38c4619 100644
--- a/compiler/semtypes.nim
+++ b/compiler/semtypes.nim
@@ -1085,8 +1085,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
   of nkCallKinds:
     if isRange(n):
       result = semRangeAux(c, n, prev)
-    elif n[0].kind == nkIdent:
-      let op = n.sons[0].ident
+    elif n[0].kind notin nkIdentKinds:
+      result = semTypeExpr(c, n)
+    else:
+      let op = considerQuotedIdent(n.sons[0])
       if op.id in {ord(wAnd), ord(wOr)} or op.s == "|":
         checkSonsLen(n, 3)
         var
@@ -1121,8 +1123,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
         result = semAnyRef(c, n, tyRef, prev)
       else:
         result = semTypeExpr(c, n)
-    else:
-      result = semTypeExpr(c, n)
   of nkWhenStmt:
     var whenResult = semWhen(c, n, false)
     if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType
diff --git a/compiler/suggest.nim b/compiler/suggest.nim
index a46c6a082..db95c480f 100644
--- a/compiler/suggest.nim
+++ b/compiler/suggest.nim
@@ -322,7 +322,7 @@ proc suggestSym*(n: PNode, s: PSym) {.inline.} =
     findUsages(n, s)
   if optDef in gGlobalOptions:
     findDefinition(n, s)
-  if isServing:
+  if isServing and not n.isNil:
     addToSourceMap(s, n.info)
 
 proc markUsed(n: PNode, s: PSym) =
diff --git a/compiler/vm.nim b/compiler/vm.nim
index 218369fa1..0c2c23987 100644
--- a/compiler/vm.nim
+++ b/compiler/vm.nim
@@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) =
       nfAllConst in x.node.flags:
     # XXX this is hacky; tests/txmlgen triggers it:
     x.node = newNode(nkStrLit)
-    #  debug x.node
-    #assert x.node.kind in {nkStrLit..nkTripleStrLit}
+    # It not only hackey, it is also wrong for tgentemplate. The primary
+    # cause of bugs like these is that the VM does not properly distinguish
+    # between variable defintions (var foo = e) and variable updates (foo = e).
 
 template createStr(x) =
   x.node = newNode(nkStrLit)
diff --git a/compiler/vmdeps.nim b/compiler/vmdeps.nim
index 0e01f5031..9a213d813 100644
--- a/compiler/vmdeps.nim
+++ b/compiler/vmdeps.nim
@@ -15,6 +15,8 @@ proc readOutput(p: PProcess): string =
   discard p.waitForExit
   while not output.atEnd:
     result.add(output.readLine)
+    result.add("\n")
+  result.setLen(result.len - "\n".len)
 
 proc opGorge*(cmd, input: string): string =
   var p = startCmd(cmd)
diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim
index 186d27ae4..305ea7f9e 100644
--- a/compiler/vmgen.nim
+++ b/compiler/vmgen.nim
@@ -787,10 +787,12 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
     c.freeTemp(tmp)
   of mSwap: 
     unused(n, dest)
-    var d = c.genx(n.sons[1])
-    var tmp = c.genx(n.sons[2])
-    c.gABC(n, opcSwap, d, tmp)
-    c.freeTemp(tmp)
+    var
+      d1 = c.genx(n.sons[1])
+      d2 = c.genx(n.sons[2])
+    c.gABC(n, opcSwap, d1, d2)
+    c.genAsgnPatch(n.sons[1], d1)
+    c.genAsgnPatch(n.sons[2], d2)
   of mIsNil: genUnaryABC(c, n, dest, opcIsNil)
   of mCopyStr:
     if dest < 0: dest = c.getTemp(n.typ)
diff --git a/config/nimrod.cfg b/config/nimrod.cfg
index 2817eac55..df3835ace 100644
--- a/config/nimrod.cfg
+++ b/config/nimrod.cfg
@@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc"
 path="$lib/core"
 path="$lib/pure"
 path="$lib/pure/collections"
+path="$lib/pure/concurrency"
 path="$lib/impure"
 path="$lib/wrappers"
 # path="$lib/wrappers/cairo"
diff --git a/doc/manual.txt b/doc/manual.txt
index 3af41faec..e96e50999 100644
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -2823,7 +2823,7 @@ The following builtin procs cannot be overloaded for reasons of implementation
 simplicity (they require specialized semantic checking)::
 
   defined, definedInScope, compiles, low, high, sizeOf, 
-  is, of, echo, shallowCopy, getAst
+  is, of, echo, shallowCopy, getAst, spawn
 
 Thus they act more like keywords than like ordinary identifiers; unlike a 
 keyword however, a redefinition may `shadow`:idx: the definition in 
diff --git a/doc/spawn.txt b/doc/spawn.txt
new file mode 100644
index 000000000..ed500f3a5
--- /dev/null
+++ b/doc/spawn.txt
@@ -0,0 +1,98 @@
+==========================================================
+                  Parallel & Spawn
+==========================================================
+
+Nimrod has two flavors of parallelism: 
+1) `Structured`:idx parallelism via the ``parallel`` statement.
+2) `Unstructured`:idx: parallelism via the standalone ``spawn`` statement.
+
+Both need the `threadpool <threadpool.html>`_ module to work.
+
+Somewhat confusingly, ``spawn`` is also used in the ``parallel`` statement
+with slightly different semantics. ``spawn`` always takes a call expression of
+the form ``f(a, ...)``. Let ``T`` be ``f``'s return type. If ``T`` is ``void``
+then ``spawn``'s return type is also ``void``. Within a ``parallel`` section
+``spawn``'s return type is ``T``, otherwise it is ``FlowVar[T]``.
+
+The compiler can ensure the location in ``location = spawn f(...)`` is not
+read prematurely within a ``parallel`` section and so there is no need for
+the overhead of an indirection via ``FlowVar[T]`` to ensure correctness.
+
+
+Parallel statement
+==================
+
+Example:
+
+.. code-block:: nimrod
+  # Compute PI in an inefficient way
+  import strutils, math, threadpool
+
+  proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1)
+
+  proc pi(n: int): float =
+    var ch = newSeq[float](n+1)
+    parallel:
+      for k in 0..ch.high:
+        ch[k] = spawn term(float(k))
+    for k in 0..ch.high:
+      result += ch[k]
+
+  echo formatFloat(pi(5000))
+
+
+The parallel statement is the preferred mechanism to introduce parallelism
+in a Nimrod program. A subset of the Nimrod language is valid within a
+``parallel`` section. This subset is checked to be free of data races at
+compile time. A sophisticated `disjoint checker`:idx: ensures that no data
+races are possible even though shared memory is extensively supported!
+
+The subset is in fact the full language with the following
+restrictions / changes:
+
+* ``spawn`` within a ``parallel`` section has special semantics.
+* Every location of the form ``a[i]`` and ``a[i..j]`` and ``dest`` where
+  ``dest`` is part of the pattern ``dest = spawn f(...)`` has to be
+  provable disjoint. This is called the *disjoint check*.
+* Every other complex location ``loc`` that is used in a spawned
+  proc (``spawn f(loc)``) has to be immutable for the duration of
+  the ``parallel`` section. This is called the *immutability check*. Currently
+  it is not specified what exactly "complex location" means. We need to make
+  this an optimization!
+* Every array access has to be provable within bounds. This is called 
+  the *bounds check*.
+* Slices are optimized so that no copy is performed. This optimization is not
+  yet performed for ordinary slices outside of a ``parallel`` section. Slices
+  are also special in that they currently do not support negative indexes!
+
+
+
+
+Spawn statement
+===============
+
+A standalone ``spawn`` statement is a simple construct. It executes
+the passed expression on the thread pool and returns a `data flow variable`:idx:
+``FlowVar[T]`` that can be read from. The reading with the ``^`` operator is
+**blocking**. However, one can use ``awaitAny`` to wait on multiple flow
+variables at the same time:
+
+.. code-block:: nimrod
+  import threadpool, ...
+  
+  # wait until 2 out of 3 servers received the update:
+  proc main =
+    var responses = newSeq[RawFlowVar](3)
+    for i in 0..2:
+      responses[i] = spawn tellServer(Update, "key", "value")
+    var index = awaitAny(responses)
+    assert index >= 0
+    responses.del(index)
+    discard awaitAny(responses)
+
+Like the ``parallel`` statement data flow variables ensure that no data races
+are possible. Due to technical limitations not every type ``T`` is possible in
+a data flow variable: ``T`` has to be of the type ``ref``, ``string``, ``seq``
+or of a type that doesn't contain a type that is garbage collected. This
+restriction will be removed in the future.
+
diff --git a/doc/tut1.txt b/doc/tut1.txt
index 8c6f140eb..9874f267b 100644
--- a/doc/tut1.txt
+++ b/doc/tut1.txt
@@ -1385,8 +1385,8 @@ Tuples
 A tuple type defines various named *fields* and an *order* of the fields.
 The constructor ``()`` can be used to construct tuples. The order of the
 fields in the constructor must match the order in the tuple's definition.
-Different tuple-types are *equivalent* if they specify the same fields of
-the same type in the same order.
+Different tuple-types are *equivalent* if they specify fields of
+the same type and of the same name in the same order.
 
 The assignment operator for tuples copies each component. The notation
 ``t.field`` is used to access a tuple's field. Another notation is
diff --git a/doc/tut2.txt b/doc/tut2.txt
index 11b485f50..1e23618e0 100644
--- a/doc/tut2.txt
+++ b/doc/tut2.txt
@@ -501,7 +501,7 @@ with the file and line where the uncaught exception is being raised, which may
 help you locate the offending code which has changed.
 
 If you want to add the ``{.raises.}`` pragma to existing code, the compiler can
-also help you. You can add the ``{.effect.}`` pragma statement to your proc and
+also help you. You can add the ``{.effects.}`` pragma statement to your proc and
 the compiler will output all inferred effects up to that point (exception
 tracking is part of Nimrod's effect system). Another more roundabout way to
 find out the list of exceptions raised by a proc is to use the Nimrod ``doc2``
diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim
index 87ee83ad9..6d9e605f1 100644
--- a/lib/pure/asyncdispatch.nim
+++ b/lib/pure/asyncdispatch.nim
@@ -27,9 +27,11 @@ export TPort
 
 
 # TODO: Discarded void PFutures need to checked for exception.
-# TODO: Exceptions are currently uncatchable due to the limitation that 
-# you cannot have yield in a try stmt. Perhaps I can get the macro to put
-# a user's try except around ``future.read``.
+# TODO: ``except`` statement (without `try`) does not work.
+# TODO: Multiple exception names in a ``except`` don't work.
+# TODO: The effect system (raises: []) has trouble with my try transformation.
+# TODO: Can't await in a 'except' body
+
 
 # -- Futures
 
@@ -922,14 +924,17 @@ proc getName(node: PNimrodNode): string {.compileTime.} =
     return $node[1].ident
   of nnkIdent:
     return $node.ident
+  of nnkEmpty:
+    return "anonymous"
   else:
-    assert false
+    error("Unknown name.")
 
 macro async*(prc: stmt): stmt {.immediate.} =
   ## Macro which processes async procedures into the appropriate
   ## iterators and yield statements.
-
-  expectKind(prc, nnkProcDef)
+  if prc.kind notin {nnkProcDef, nnkLambda}:
+    error("Cannot transform this node kind into an async proc." &
+          " Proc definition or lambda node expected.")
 
   hint("Processing " & prc[0].getName & " as an async proc.")
 
@@ -941,7 +946,9 @@ macro async*(prc: stmt): stmt {.immediate.} =
     if $returnType[0] != "PFuture":
       error("Expected return type of 'PFuture' got '" & $returnType[0] & "'")
 
-  let subtypeIsVoid = returnType.kind == nnkEmpty
+  let subtypeIsVoid = returnType.kind == nnkEmpty or
+        (returnType.kind == nnkBracketExpr and
+         returnType[1].kind == nnkIdent and returnType[1].ident == !"void")
 
   var outerProcBody = newNimNode(nnkStmtList)
 
@@ -990,17 +997,19 @@ macro async*(prc: stmt): stmt {.immediate.} =
 
   # Remove the 'async' pragma.
   for i in 0 .. <result[4].len:
-    if result[4][i].ident == !"async":
+    if result[4][i].kind == nnkIdent and result[4][i].ident == !"async":
       result[4].del(i)
   if subtypeIsVoid:
     # Add discardable pragma.
-    result[4].add(newIdentNode("discardable"))
+    if prc.kind == nnkProcDef: # TODO: This is a workaround for #1287
+      result[4].add(newIdentNode("discardable"))
     if returnType.kind == nnkEmpty:
       # Add PFuture[void]
       result[3][0] = parseExpr("PFuture[void]")
 
   result[6] = outerProcBody
 
+  #echo(treeRepr(result))
   #echo(toStrLit(result))
 
 proc recvLine*(socket: TAsyncFD): PFuture[string] {.async.} =
diff --git a/lib/pure/asynchttpserver.nim b/lib/pure/asynchttpserver.nim
index 6c2414d99..005c56ebc 100644
--- a/lib/pure/asynchttpserver.nim
+++ b/lib/pure/asynchttpserver.nim
@@ -14,12 +14,13 @@
 import strtabs, asyncnet, asyncdispatch, parseutils, parseurl, strutils
 type
   TRequest* = object
-    client: PAsyncSocket # TODO: Separate this into a Response object?
+    client*: PAsyncSocket # TODO: Separate this into a Response object?
     reqMethod*: string
     headers*: PStringTable
     protocol*: tuple[orig: string, major, minor: int]
     url*: TURL
     hostname*: string ## The hostname of the client that made the request.
+    body*: string # TODO
 
   PAsyncHttpServer* = ref object
     socket: PAsyncSocket
@@ -169,6 +170,10 @@ proc serve*(server: PAsyncHttpServer, port: TPort,
     var fut = await server.socket.acceptAddr()
     processClient(fut.client, fut.address, callback)
 
+proc close*(server: PAsyncHttpServer) =
+  ## Terminates the async http server instance.
+  server.socket.close()
+
 when isMainModule:
   var server = newAsyncHttpServer()
   proc cb(req: TRequest) {.async.} =
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index bc249ed63..4ba67cb2e 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -112,6 +112,10 @@ proc incl*[A](s: var TSet[A], key: A) =
   ## includes an element `key` in `s`.
   inclImpl()
 
+proc incl*[A](s: var TSet[A], other: TSet[A]) =
+  ## includes everything in `other` in `s`
+  for item in other: incl(s, item)
+
 proc excl*[A](s: var TSet[A], key: A) =
   ## excludes `key` from the set `s`.
   var index = rawGet(s, key)
@@ -119,6 +123,10 @@ proc excl*[A](s: var TSet[A], key: A) =
     s.data[index].slot = seDeleted
     dec(s.counter)
 
+proc excl*[A](s: var TSet[A], other: TSet[A]) =
+  ## excludes everything in `other` from `s`.
+  for item in other: excl(s, item)
+
 proc containsOrIncl*[A](s: var TSet[A], key: A): bool =
   ## returns true if `s` contains `key`, otherwise `key` is included in `s`
   ## and false is returned.
@@ -147,6 +155,43 @@ proc `$`*[A](s: TSet[A]): string =
   ## The `$` operator for hash sets.
   dollarImpl()
 
+proc union*[A](s1, s2: TSet[A]): TSet[A] =
+  ## returns a new set of all items that are contained in at
+  ## least one of `s1` and `s2`
+  result = s1
+  incl(result, s2)
+
+proc intersection*[A](s1, s2: TSet[A]): TSet[A] =
+  ## returns a new set of all items that are contained in both `s1` and `s2`
+  result = initSet[A](min(s1.data.len, s2.data.len))
+  for item in s1:
+    if item in s2: incl(result, item)
+
+proc symmetricDifference*[A](s1, s2: TSet[A]): TSet[A] =
+  ## returns a new set of all items that are contained in either
+  ## `s1` or `s2`, but not both
+  result = s1
+  for item in s2:
+    if containsOrIncl(result, item): excl(result, item)
+
+proc `+`*[A](s1, s2: TSet[A]): TSet[A] {.inline.} =
+  ## alias for `union`
+  result = union(s1, s2)
+
+proc `*`*[A](s1, s2: TSet[A]): TSet[A] {.inline.} =
+  ## alias for `intersection`
+  result = intersection(s1, s2)
+
+proc `-+-`*[A](s1, s2: TSet[A]): TSet[A] {.inline.} =
+  ## alias for `symmetricDifference`
+  result = symmetricDifference(s1, s2)
+
+proc disjoint*[A](s1, s2: TSet[A]): bool =
+  ## returns true iff `s1` and `s2` have no items in common
+  for item in s1:
+    if item in s2: return false
+  return true
+
 # ------------------------------ ordered set ------------------------------
 
 type
@@ -211,6 +256,10 @@ proc incl*[A](s: var TOrderedSet[A], key: A) =
   ## includes an element `key` in `s`.
   inclImpl()
 
+proc incl*[A](s: var TSet[A], other: TOrderedSet[A]) =
+  ## includes everything in `other` in `s`
+  for item in other: incl(s, item)
+
 proc containsOrIncl*[A](s: var TOrderedSet[A], key: A): bool =
   ## returns true if `s` contains `key`, otherwise `key` is included in `s`
   ## and false is returned.
diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim
new file mode 100644
index 000000000..dfa819f64
--- /dev/null
+++ b/lib/pure/concurrency/cpuinfo.nim
@@ -0,0 +1,58 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2014 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module implements procs to determine the number of CPUs / cores.
+
+include "system/inclrtl"
+
+import strutils, os
+
+when not defined(windows):
+  import posix
+
+when defined(linux):
+  import linux
+
+when defined(macosx) or defined(bsd):
+  const
+    CTL_HW = 6
+    HW_AVAILCPU = 25
+    HW_NCPU = 3
+  proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
+              a: var csize, b: pointer, c: int): cint {.
+             importc: "sysctl", header: "<sys/sysctl.h>".}
+
+proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
+  ## returns the numer of the processors/cores the machine has.
+  ## Returns 0 if it cannot be detected.
+  when defined(windows):
+    var x = getEnv("NUMBER_OF_PROCESSORS")
+    if x.len > 0: result = parseInt(x.string)
+  elif defined(macosx) or defined(bsd):
+    var
+      mib: array[0..3, cint]
+      numCPU: int
+      len: csize
+    mib[0] = CTL_HW
+    mib[1] = HW_AVAILCPU
+    len = sizeof(numCPU)
+    discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
+    if numCPU < 1:
+      mib[1] = HW_NCPU
+      discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
+    result = numCPU
+  elif defined(hpux):
+    result = mpctl(MPC_GETNUMSPUS, nil, nil)
+  elif defined(irix):
+    var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
+    result = sysconf(SC_NPROC_ONLN)
+  else:
+    result = sysconf(SC_NPROCESSORS_ONLN)
+  if result <= 0: result = 1
+
diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim
new file mode 100644
index 000000000..3cf6a7392
--- /dev/null
+++ b/lib/pure/concurrency/cpuload.nim
@@ -0,0 +1,96 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2014 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module implements a helper for a thread pool to determine whether
+## creating a thread is a good idea.
+
+when defined(windows):
+  import winlean, os, strutils, math
+
+  proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime
+elif defined(linux):
+  from cpuinfo import countProcessors
+
+type
+  ThreadPoolAdvice* = enum
+    doNothing,
+    doCreateThread,  # create additional thread for throughput
+    doShutdownThread # too many threads are busy, shutdown one
+
+  ThreadPoolState* = object
+    when defined(windows):
+      prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME
+    calls*: int
+
+proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
+  when defined(windows):
+    var
+      sysIdle, sysKernel, sysUser,
+        procCreation, procExit, procKernel, procUser: TFILETIME
+    if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or
+        getProcessTimes(THandle(-1), procCreation, procExit, 
+                        procKernel, procUser) == 0:
+      return doNothing
+    if s.calls > 0:
+      let
+        sysKernelDiff = sysKernel - s.prevSysKernel
+        sysUserDiff = sysUser - s.prevSysUser
+
+        procKernelDiff = procKernel - s.prevProcKernel
+        procUserDiff = procUser - s.prevProcUser
+
+        sysTotal = int(sysKernelDiff + sysUserDiff)
+        procTotal = int(procKernelDiff + procUserDiff)
+      # total CPU usage < 85% --> create a new worker thread.
+      # Measurements show that 100% and often even 90% is not reached even
+      # if all my cores are busy.
+      if sysTotal == 0 or procTotal / sysTotal < 0.85:
+        result = doCreateThread
+    s.prevSysKernel = sysKernel
+    s.prevSysUser = sysUser
+    s.prevProcKernel = procKernel
+    s.prevProcUser = procUser
+  elif defined(linux):
+    proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, 
+      header: "<stdio.h>".}
+
+    var f = open("/proc/loadavg")
+    var b: float
+    var busy, total: int
+    fscanf(f,"%lf %lf %lf %ld/%ld",
+           addr b, addr b, addr b, addr busy, addr total)
+    f.close()
+    let cpus = countProcessors()
+    if busy-1 < cpus:
+      result = doCreateThread
+    elif busy-1 >= cpus*2:
+      result = doShutdownThread
+    else:
+      result = doNothing
+  else:
+    # XXX implement this for other OSes
+    result = doNothing
+  inc s.calls
+
+when isMainModule:
+  proc busyLoop() =
+    while true:
+      discard random(80)
+      os.sleep(100)
+
+  spawn busyLoop()
+  spawn busyLoop()
+  spawn busyLoop()
+  spawn busyLoop()
+
+  var s: ThreadPoolState
+
+  for i in 1 .. 70:
+    echo advice(s)
+    os.sleep(1000)
diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim
new file mode 100644
index 000000000..fd1041918
--- /dev/null
+++ b/lib/pure/concurrency/threadpool.nim
@@ -0,0 +1,378 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2014 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Implements Nimrod's 'spawn'.
+
+import cpuinfo, cpuload, locks
+
+{.push stackTrace:off.}
+
+type
+  CondVar = object
+    c: TCond
+    L: TLock
+    counter: int
+
+proc createCondVar(): CondVar =
+  initCond(result.c)
+  initLock(result.L)
+
+proc destroyCondVar(cv: var CondVar) {.inline.} =
+  deinitCond(cv.c)
+  deinitLock(cv.L)
+
+proc await(cv: var CondVar) =
+  acquire(cv.L)
+  while cv.counter <= 0:
+    wait(cv.c, cv.L)
+  dec cv.counter
+  release(cv.L)
+
+proc signal(cv: var CondVar) =
+  acquire(cv.L)
+  inc cv.counter
+  release(cv.L)
+  signal(cv.c)
+
+const CacheLineSize = 32 # true for most archs
+
+type
+  Barrier {.compilerProc.} = object
+    entered: int
+    cv: CondVar # condvar takes 3 words at least
+    when sizeof(int) < 8:
+      cacheAlign: array[CacheLineSize-4*sizeof(int), byte]
+    left: int
+    cacheAlign2: array[CacheLineSize-sizeof(int), byte]
+    interest: bool ## wether the master is interested in the "all done" event
+
+proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} =
+  # due to the signaling between threads, it is ensured we are the only
+  # one with access to 'entered' so we don't need 'atomicInc' here:
+  inc b.entered
+  # also we need no 'fence' instructions here as soon 'nimArgsPassingDone'
+  # will be called which already will perform a fence for us.
+
+proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} =
+  atomicInc b.left
+  when not defined(x86): fence()
+  if b.interest and b.left == b.entered: signal(b.cv)
+
+proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} =
+  b.entered = 0
+  b.left = 0
+  b.interest = false
+
+proc closeBarrier(b: ptr Barrier) {.compilerProc.} =
+  fence()
+  if b.left != b.entered:
+    b.cv = createCondVar()
+    fence()
+    b.interest = true
+    fence()
+    while b.left != b.entered: await(b.cv)
+    destroyCondVar(b.cv)
+
+{.pop.}
+
+# ----------------------------------------------------------------------------
+
+type
+  foreign* = object ## a region that indicates the pointer comes from a
+                    ## foreign thread heap.
+  AwaitInfo = object
+    cv: CondVar
+    idx: int
+
+  FlowVarBase* = ref FlowVarBaseObj ## untyped base class for 'FlowVar[T]'
+  FlowVarBaseObj = object of TObject
+    ready, usesCondVar: bool
+    cv: CondVar #\
+    # for 'awaitAny' support
+    ai: ptr AwaitInfo
+    idx: int
+    data: pointer  # we incRef and unref it to keep it alive
+    owner: pointer # ptr Worker
+
+  FlowVarObj[T] = object of FlowVarBaseObj
+    blob: T
+
+  FlowVar*{.compilerProc.}[T] = ref FlowVarObj[T] ## a data flow variable
+
+  ToFreeQueue = object
+    len: int
+    lock: TLock
+    empty: TCond
+    data: array[512, pointer]
+
+  WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
+  Worker = object
+    taskArrived: CondVar
+    taskStarted: CondVar #\
+    # task data:
+    f: WorkerProc
+    data: pointer
+    ready: bool # put it here for correct alignment!
+    initialized: bool # whether it has even been initialized
+    shutdown: bool # the pool requests to shut down this worker thread
+    q: ToFreeQueue
+
+proc await*(fv: FlowVarBase) =
+  ## waits until the value for the flowVar arrives. Usually it is not necessary
+  ## to call this explicitly.
+  if fv.usesCondVar:
+    fv.usesCondVar = false
+    await(fv.cv)
+    destroyCondVar(fv.cv)
+
+proc finished(fv: FlowVarBase) =
+  doAssert fv.ai.isNil, "flowVar is still attached to an 'awaitAny'"
+  # we have to protect against the rare cases where the owner of the flowVar
+  # simply disregards the flowVar and yet the "flowVarr" has not yet written
+  # anything to it:
+  await(fv)
+  if fv.data.isNil: return
+  let owner = cast[ptr Worker](fv.owner)
+  let q = addr(owner.q)
+  var waited = false
+  while true:
+    acquire(q.lock)
+    if q.len < q.data.len:
+      q.data[q.len] = fv.data
+      inc q.len
+      release(q.lock)
+      break
+    else:
+      # the queue is exhausted! We block until it has been cleaned:
+      release(q.lock)
+      wait(q.empty, q.lock)
+      waited = true
+  fv.data = nil
+  # wakeup other potentially waiting threads:
+  if waited: signal(q.empty)
+
+proc cleanFlowVars(w: ptr Worker) =
+  let q = addr(w.q)
+  acquire(q.lock)
+  for i in 0 .. <q.len:
+    GC_unref(cast[PObject](q.data[i]))
+  q.len = 0
+  release(q.lock)
+  signal(q.empty)
+
+proc fvFinalizer[T](fv: FlowVar[T]) = finished(fv)
+
+proc nimCreateFlowVar[T](): FlowVar[T] {.compilerProc.} =
+  new(result, fvFinalizer)
+
+proc nimFlowVarCreateCondVar(fv: FlowVarBase) {.compilerProc.} =
+  fv.cv = createCondVar()
+  fv.usesCondVar = true
+
+proc nimFlowVarSignal(fv: FlowVarBase) {.compilerProc.} =
+  if fv.ai != nil:
+    acquire(fv.ai.cv.L)
+    fv.ai.idx = fv.idx
+    inc fv.ai.cv.counter
+    release(fv.ai.cv.L)
+    signal(fv.ai.cv.c)
+  if fv.usesCondVar: signal(fv.cv)
+
+proc awaitAndThen*[T](fv: FlowVar[T]; action: proc (x: T) {.closure.}) =
+  ## blocks until the ``fv`` is available and then passes its value
+  ## to ``action``. Note that due to Nimrod's parameter passing semantics this
+  ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can
+  ## sometimes be more efficient than ``^``.
+  await(fv)
+  when T is string or T is seq:
+    action(cast[T](fv.data))
+  elif T is ref:
+    {.error: "'awaitAndThen' not available for FlowVar[ref]".}
+  else:
+    action(fv.blob)
+  finished(fv)
+
+proc `^`*[T](fv: FlowVar[ref T]): foreign ptr T =
+  ## blocks until the value is available and then returns this value.
+  await(fv)
+  result = cast[foreign ptr T](fv.data)
+
+proc `^`*[T](fv: FlowVar[T]): T =
+  ## blocks until the value is available and then returns this value.
+  await(fv)
+  when T is string or T is seq:
+    result = cast[T](fv.data)
+  else:
+    result = fv.blob
+
+proc awaitAny*(flowVars: openArray[FlowVarBase]): int =
+  ## awaits any of the given flowVars. Returns the index of one flowVar for
+  ## which a value arrived. A flowVar only supports one call to 'awaitAny' at
+  ## the same time. That means if you await([a,b]) and await([b,c]) the second
+  ## call will only await 'c'. If there is no flowVar left to be able to wait
+  ## on, -1 is returned.
+  ## **Note**: This results in non-deterministic behaviour and so should be
+  ## avoided.
+  var ai: AwaitInfo
+  ai.cv = createCondVar()
+  var conflicts = 0
+  for i in 0 .. flowVars.high:
+    if cas(addr flowVars[i].ai, nil, addr ai):
+      flowVars[i].idx = i
+    else:
+      inc conflicts
+  if conflicts < flowVars.len:
+    await(ai.cv)
+    result = ai.idx
+    for i in 0 .. flowVars.high:
+      discard cas(addr flowVars[i].ai, addr ai, nil)
+  else:
+    result = -1
+  destroyCondVar(ai.cv)
+
+proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
+  let w = cast[ptr Worker](p)
+  signal(w.taskStarted)
+
+const
+  MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
+                           ## should be good enough for anybody ;-)
+
+var
+  currentPoolSize: int
+  maxPoolSize = MaxThreadPoolSize
+  minPoolSize = 4
+  gSomeReady = createCondVar()
+  readyWorker: ptr Worker
+
+proc slave(w: ptr Worker) {.thread.} =
+  while true:
+    w.ready = true
+    readyWorker = w
+    signal(gSomeReady)
+    await(w.taskArrived)
+    assert(not w.ready)
+    w.f(w, w.data)
+    if w.q.len != 0: w.cleanFlowVars
+    if w.shutdown:
+      w.shutdown = false
+      atomicDec currentPoolSize
+
+proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
+  ## sets the minimal thread pool size. The default value of this is 4.
+  minPoolSize = size
+
+proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) =
+  ## sets the minimal thread pool size. The default value of this
+  ## is ``MaxThreadPoolSize``.
+  maxPoolSize = size
+
+var
+  workers: array[MaxThreadPoolSize, TThread[ptr Worker]]
+  workersData: array[MaxThreadPoolSize, Worker]
+
+proc activateThread(i: int) {.noinline.} =
+  workersData[i].taskArrived = createCondVar()
+  workersData[i].taskStarted = createCondVar()
+  workersData[i].initialized = true
+  initCond(workersData[i].q.empty)
+  initLock(workersData[i].q.lock)
+  createThread(workers[i], slave, addr(workersData[i]))
+
+proc setup() =
+  currentPoolSize = min(countProcessors(), MaxThreadPoolSize)
+  readyWorker = addr(workersData[0])
+  for i in 0.. <currentPoolSize: activateThread(i)
+
+proc preferSpawn*(): bool =
+  ## Use this proc to determine quickly if a 'spawn' or a direct call is
+  ## preferable. If it returns 'true' a 'spawn' may make sense. In general
+  ## it is not necessary to call this directly; use 'spawnX' instead.
+  result = gSomeReady.counter > 0
+
+proc spawn*(call: expr): expr {.magic: "Spawn".}
+  ## always spawns a new task, so that the 'call' is never executed on
+  ## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
+  ## is gcsafe and has a return type that is either 'void' or compatible
+  ## with ``FlowVar[T]``.
+
+template spawnX*(call: expr): expr =
+  ## spawns a new task if a CPU core is ready, otherwise executes the
+  ## call in the calling thread. Usually it is advised to
+  ## use 'spawn' in order to not block the producer for an unknown
+  ## amount of time. 'call' has to be proc call 'p(...)' where 'p'
+  ## is gcsafe and has a return type that is either 'void' or compatible
+  ## with ``FlowVar[T]``.
+  (if preferSpawn(): spawn call else: call)
+
+proc parallel*(body: stmt) {.magic: "Parallel".}
+  ## a parallel section can be used to execute a block in parallel. ``body``
+  ## has to be in a DSL that is a particular subset of the language. Please
+  ## refer to the manual for further information.
+
+var
+  state: ThreadPoolState
+  stateLock: TLock
+
+initLock stateLock
+
+proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool =
+  if cas(addr w.ready, true, false):
+    w.data = data
+    w.f = fn
+    signal(w.taskArrived)
+    await(w.taskStarted)
+    result = true
+
+proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
+  # implementation of 'spawn' that is used by the code generator.
+  while true:
+    if selectWorker(readyWorker, fn, data): return
+    for i in 0.. <currentPoolSize:
+      if selectWorker(addr(workersData[i]), fn, data): return
+    # determine what to do, but keep in mind this is expensive too:
+    # state.calls < maxPoolSize: warmup phase
+    # (state.calls and 127) == 0: periodic check
+    if state.calls < maxPoolSize or (state.calls and 127) == 0:
+      # ensure the call to 'advice' is atomic:
+      if tryAcquire(stateLock):
+        case advice(state)
+        of doNothing: discard
+        of doCreateThread:
+          if currentPoolSize < maxPoolSize:
+            if not workersData[currentPoolSize].initialized:
+              activateThread(currentPoolSize)
+            let w = addr(workersData[currentPoolSize])
+            atomicInc currentPoolSize
+            if selectWorker(w, fn, data):
+              release(stateLock)
+              return
+            # else we didn't succeed but some other thread, so do nothing.
+        of doShutdownThread:
+          if currentPoolSize > minPoolSize:
+            let w = addr(workersData[currentPoolSize-1])
+            w.shutdown = true
+          # we don't free anything here. Too dangerous.
+        release(stateLock)
+      # else the acquire failed, but this means some
+      # other thread succeeded, so we don't need to do anything here.
+    await(gSomeReady)
+
+proc sync*() =
+  ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
+  ## waiting, you have to use an explicit barrier.
+  while true:
+    var allReady = true
+    for i in 0 .. <currentPoolSize:
+      if not allReady: break
+      allReady = allReady and workersData[i].ready
+    if allReady: break
+    await(gSomeReady)
+
+setup()
diff --git a/lib/pure/json.nim b/lib/pure/json.nim
index bd5259f95..508e564c5 100644
--- a/lib/pure/json.nim
+++ b/lib/pure/json.nim
@@ -679,8 +679,8 @@ proc `[]`*(node: PJsonNode, name: string): PJsonNode =
 proc `[]`*(node: PJsonNode, index: int): PJsonNode =
   ## Gets the node at `index` in an Array. Result is undefined if `index`
   ## is out of bounds
+  assert(not isNil(node))
   assert(node.kind == JArray)
-  assert(node != nil)
   return node.elems[index]
 
 proc hasKey*(node: PJsonNode, key: string): bool =
@@ -713,14 +713,12 @@ proc `[]=`*(obj: PJsonNode, key: string, val: PJsonNode) =
       return
   obj.fields.add((key, val))
 
-proc `{}`*(node: PJsonNode, names: varargs[string]): PJsonNode =
+proc `{}`*(node: PJsonNode, key: string): PJsonNode =
   ## Transverses the node and gets the given value. If any of the
   ## names does not exist, returns nil
   result = node
-  for name in names:
-    result = result[name]
-    if isNil(result):
-      return nil
+  if isNil(node): return nil
+  result = result[key]
 
 proc `{}=`*(node: PJsonNode, names: varargs[string], value: PJsonNode) =
   ## Transverses the node and tries to set the value at the given location
@@ -1059,7 +1057,7 @@ when isMainModule:
 
   let testJson = parseJson"""{ "a": [1, 2, 3, 4], "b": "asd" }"""
   # nil passthrough
-  assert(testJson{"doesnt_exist", "anything"} == nil)
+  assert(testJson{"doesnt_exist"}{"anything"}.isNil)
   testJson{["c", "d"]} = %true
   assert(testJson["c"]["d"].bval)
 
diff --git a/lib/pure/net.nim b/lib/pure/net.nim
index 2f1a6fa46..e34c88327 100644
--- a/lib/pure/net.nim
+++ b/lib/pure/net.nim
@@ -11,7 +11,7 @@
 
 {.deadCodeElim: on.}
 import rawsockets, os, strutils, unsigned, parseutils, times
-export TPort
+export TPort, `$`
 
 const useWinVersion = defined(Windows) or defined(nimdoc)
 
diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim
index cb9792f2b..04a0c2403 100644
--- a/lib/pure/osproc.nim
+++ b/lib/pure/osproc.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2014 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -13,7 +13,7 @@
 include "system/inclrtl"
 
 import
-  strutils, os, strtabs, streams
+  strutils, os, strtabs, streams, cpuinfo
 
 when defined(windows):
   import winlean
@@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1",
   ## it is closed when closing the PProcess ``p``.
   result = p.errHandle
 
-when defined(macosx) or defined(bsd):
-  const
-    CTL_HW = 6
-    HW_AVAILCPU = 25
-    HW_NCPU = 3
-  proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
-              a: var csize, b: pointer, c: int): cint {.
-             importc: "sysctl", header: "<sys/sysctl.h>".}
-
 proc countProcessors*(): int {.rtl, extern: "nosp$1".} =
   ## returns the numer of the processors/cores the machine has.
   ## Returns 0 if it cannot be detected.
-  when defined(windows):
-    var x = getEnv("NUMBER_OF_PROCESSORS")
-    if x.len > 0: result = parseInt(x.string)
-  elif defined(macosx) or defined(bsd):
-    var
-      mib: array[0..3, cint]
-      numCPU: int
-      len: csize
-    mib[0] = CTL_HW
-    mib[1] = HW_AVAILCPU
-    len = sizeof(numCPU)
-    discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
-    if numCPU < 1:
-      mib[1] = HW_NCPU
-      discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
-    result = numCPU
-  elif defined(hpux):
-    result = mpctl(MPC_GETNUMSPUS, nil, nil)
-  elif defined(irix):
-    var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
-    result = sysconf(SC_NPROC_ONLN)
-  else:
-    result = sysconf(SC_NPROCESSORS_ONLN)
-  if result <= 0: result = 1
+  result = cpuinfo.countProcessors()
 
 proc execProcesses*(cmds: openArray[string],
                     options = {poStdErrToStdOut, poParentStreams},
diff --git a/lib/pure/rawsockets.nim b/lib/pure/rawsockets.nim
index 07b647b68..94189fd89 100644
--- a/lib/pure/rawsockets.nim
+++ b/lib/pure/rawsockets.nim
@@ -39,7 +39,6 @@ export
   MSG_PEEK
 
 type
-  
   TPort* = distinct uint16  ## port type
   
   TDomain* = enum   ## domain, which specifies the protocol family of the
diff --git a/lib/system.nim b/lib/system.nim
index 2f24f68b1..f45707849 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -42,7 +42,6 @@ type
   cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type
   pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr``
                               ## operator to get a pointer to a variable
-
 const
   on* = true    ## alias for ``true``
   off* = false  ## alias for ``false``
@@ -51,6 +50,9 @@ const
 
 type
   Ordinal* {.magic: Ordinal.}[T]
+  `ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type
+  `ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type
+
   `nil` {.magic: "Nil".}
   expr* {.magic: Expr.} ## meta type to denote an expression (for templates)
   stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates)
@@ -2983,6 +2985,10 @@ proc locals*(): TObject {.magic: "Locals", noSideEffect.} =
   ##   # -> B is 1
   discard
 
+proc deepCopy*[T](x: T): T {.magic: "DeepCopy", noSideEffect.} = discard
+  ## performs a deep copy of `x`. This is also used by the code generator
+  ## for the implementation of ``spawn``.
+
 when not defined(booting):
   type
     semistatic*[T] = static[T] | T
@@ -2991,6 +2997,3 @@ when not defined(booting):
 
   template isStatic*(x): expr = compiles(static(x))
     # checks whether `x` is a value known at compile-time
-
-when hasThreadSupport:
-  when hostOS != "standalone": include "system/sysspawn"
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 75c749633..2ae945fb1 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -179,7 +179,8 @@ when not defined(nimmixin):
     # internal proc used for destroying sequences and arrays
     for i in countup(0, r.len - 1): destroy(r[i])
 else:
-  # XXX Why is this exported and no compilerproc?
+  # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be
+  # generic for now
   proc nimDestroyRange*[T](r: T) =
     # internal proc used for destroying sequences and arrays
     mixin destroy
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
index b1a96b209..43b3f0438 100644
--- a/lib/system/atomics.nim
+++ b/lib/system/atomics.nim
@@ -1,15 +1,18 @@
 #
 #
 #            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2014 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
 ## Atomic operations for Nimrod.
+{.push stackTrace:off.}
 
-when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
+const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)
+
+when someGcc and hasThreadSupport:
   type 
     AtomMemModel* = enum
       ATOMIC_RELAXED,  ## No barriers or synchronization. 
@@ -152,41 +155,16 @@ when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
     ## A value of 0 indicates typical alignment should be used. The compiler may also 
     ## ignore this parameter.
 
+  template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
 elif defined(vcc) and hasThreadSupport:
   proc addAndFetch*(p: ptr int, val: int): int {.
     importc: "NimXadd", nodecl.}
+
 else:
   proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
     inc(p[], val)
     result = p[]
 
-# atomic compare and swap (CAS) funcitons to implement lock-free algorithms  
-      
-#if defined(windows) and not defined(gcc) and hasThreadSupport:
-#    proc InterlockedCompareExchangePointer(mem: ptr pointer,
-#      newValue: pointer, comparand: pointer) : pointer {.nodecl, 
-#        importc: "InterlockedCompareExchangePointer", header:"windows.h".}
-
-#    proc compareAndSwap*[T](mem: ptr T, 
-#      expected: T, newValue: T): bool {.inline.}=
-#      ## Returns true if successfully set value at mem to newValue when value
-#      ## at mem == expected
-#      return InterlockedCompareExchangePointer(addr(mem), 
-#        addr(newValue), addr(expected))[] == expected
-    
-#elif not hasThreadSupport:
-#  proc compareAndSwap*[T](mem: ptr T, 
-#                          expected: T, newValue: T): bool {.inline.} =
-#      ## Returns true if successfully set value at mem to newValue when value
-#      ## at mem == expected
-#      var oldval = mem[]
-#      if oldval == expected:
-#        mem[] = newValue
-#        return true
-#      return false
-
-
-# Some convenient functions 
 proc atomicInc*(memLoc: var int, x: int = 1): int =
   when defined(gcc) and hasThreadSupport:
     result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED)
@@ -203,3 +181,37 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
   else:
     dec(memLoc, x)
     result = memLoc
+
+when defined(windows) and not someGcc:
+  proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
+    {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
+
+  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
+    interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
+  # XXX fix for 64 bit build
+else:
+  # this is valid for GCC and Intel C++
+  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
+    {.importc: "__sync_bool_compare_and_swap", nodecl.}
+  # XXX is this valid for 'int'?
+
+
+when (defined(x86) or defined(amd64)) and (defined(gcc) or defined(llvm_gcc)):
+  proc cpuRelax {.inline.} =
+    {.emit: """asm volatile("pause" ::: "memory");""".}
+elif (defined(x86) or defined(amd64)) and defined(vcc):
+  proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
+elif defined(intelc):
+  proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
+elif false:
+  from os import sleep
+
+  proc cpuRelax {.inline.} = os.sleep(1)
+
+when not defined(fence) and hasThreadSupport:
+  # XXX fixme
+  proc fence*() {.inline.} =
+    var dummy: bool
+    discard cas(addr dummy, false, true)
+
+{.pop.}
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
index dabf35a3e..95cdba65d 100644
--- a/lib/system/sysspawn.nim
+++ b/lib/system/sysspawn.nim
@@ -14,30 +14,6 @@ when not defined(NimString):
 
 {.push stackTrace:off.}
 
-when (defined(x86) or defined(amd64)) and defined(gcc):
-  proc cpuRelax {.inline.} =
-    {.emit: """asm volatile("pause" ::: "memory");""".}
-elif (defined(x86) or defined(amd64)) and defined(vcc):
-  proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
-elif defined(intelc):
-  proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
-elif false:
-  from os import sleep
-
-  proc cpuRelax {.inline.} = os.sleep(1)
-
-when defined(windows) and not defined(gcc):
-  proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
-    {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
-
-  proc cas(p: ptr bool; oldValue, newValue: bool): bool =
-    interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
-
-else:
-  # this is valid for GCC and Intel C++
-  proc cas(p: ptr bool; oldValue, newValue: bool): bool
-    {.importc: "__sync_bool_compare_and_swap", nodecl.}
-
 # We declare our own condition variables here to get rid of the dummy lock
 # on Windows:
 
@@ -54,6 +30,9 @@ proc createCondVar(): CondVar =
     initSysLock(result.stupidLock)
     #acquireSys(result.stupidLock)
 
+proc destroyCondVar(c: var CondVar) {.inline.} =
+  deinitSysCond(c.c)
+
 proc await(cv: var CondVar) =
   when defined(posix):
     acquireSys(cv.stupidLock)
@@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) =
   #if cas(addr cv.slowPath, true, false):
   signal(cv.slow)
 
+type
+  Barrier* {.compilerProc.} = object
+    counter: int
+    cv: CondVar
+
+proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
+  atomicInc b.counter
+
+proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
+  atomicDec b.counter
+  if b.counter <= 0: signal(b.cv)
+
+proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
+  b.counter = 0
+  b.cv = createCondVar()
+
+proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
+  await(b.cv)
+  destroyCondVar(b.cv)
+
 {.pop.}
 
 # ----------------------------------------------------------------------------
diff --git a/tests/notnil/tnotnil4.nim b/tests/notnil/tnotnil4.nim
new file mode 100644
index 000000000..23968ee48
--- /dev/null
+++ b/tests/notnil/tnotnil4.nim
@@ -0,0 +1,14 @@
+discard ""
+type
+   TObj = ref object
+
+proc check(a: TObj not nil) =
+  echo repr(a)
+
+proc doit() =
+   var x : array[0..1, TObj]
+
+   if x[0] != nil:
+      check(x[0])
+
+doit()
\ No newline at end of file
diff --git a/tests/parallel/nimrod.cfg b/tests/parallel/nimrod.cfg
new file mode 100644
index 000000000..b81c89721
--- /dev/null
+++ b/tests/parallel/nimrod.cfg
@@ -0,0 +1 @@
+threads:on
diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim
new file mode 100644
index 000000000..c1d0e52f8
--- /dev/null
+++ b/tests/parallel/tdisjoint_slice1.nim
@@ -0,0 +1,21 @@
+discard """
+  outputsub: "EVEN 28"
+"""
+
+import threadpool
+
+proc odd(a: int) =  echo "ODD  ", a
+proc even(a: int) = echo "EVEN ", a
+
+proc main() =
+  var a: array[0..30, int]
+  for i in low(a)..high(a): a[i] = i
+  parallel:
+    var i = 0
+    while i <= 29:
+      spawn even(a[i])
+      spawn odd(a[i+1])
+      inc i, 2
+      # is correct here
+
+main()
diff --git a/tests/parallel/tdisjoint_slice2.nim b/tests/parallel/tdisjoint_slice2.nim
new file mode 100644
index 000000000..1e86ea644
--- /dev/null
+++ b/tests/parallel/tdisjoint_slice2.nim
@@ -0,0 +1,33 @@
+discard """
+  output: '''0
+1
+2
+3
+4
+5
+6
+7
+8'''
+  sortoutput: true
+"""
+
+import threadpool
+
+proc f(a: openArray[int]) =
+  for x in a: echo x
+
+proc f(a: int) = echo a
+
+proc main() =
+  var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9]
+  parallel:
+    spawn f(a[0..2])
+    #spawn f(a[16..30])
+    var i = 3
+    while i <= 8:
+      spawn f(a[i])
+      spawn f(a[i+1])
+      inc i, 2
+      # is correct here
+
+main()
diff --git a/tests/parallel/tflowvar.nim b/tests/parallel/tflowvar.nim
new file mode 100644
index 000000000..77fab14b5
--- /dev/null
+++ b/tests/parallel/tflowvar.nim
@@ -0,0 +1,17 @@
+discard """
+  output: '''foobarfoobarbazbearbazbear'''
+  cmd: "nimrod $target --threads:on $options $file"
+"""
+
+import threadpool
+
+proc computeSomething(a, b: string): string = a & b & a & b
+
+proc main =
+  let fvA = spawn computeSomething("foo", "bar")
+  let fvB = spawn computeSomething("baz", "bear")
+
+  echo(^fvA, ^fvB)
+
+main()
+sync()
diff --git a/tests/parallel/tforstmt.nim b/tests/parallel/tforstmt.nim
new file mode 100644
index 000000000..58de833f3
--- /dev/null
+++ b/tests/parallel/tforstmt.nim
@@ -0,0 +1,25 @@
+discard """
+  output: '''3
+4
+5
+6
+7'''
+  sortoutput: true
+"""
+
+import threadpool, os
+
+proc p(x: int) =
+  os.sleep(100 - x*10)
+  echo x
+
+proc testFor(a, b: int; foo: var openArray[int]) =
+  parallel:
+    for i in max(a, 0) .. min(b, foo.high):
+      spawn p(foo[i])
+
+var arr = [0, 1, 2, 3, 4, 5, 6, 7]
+
+testFor(3, 10, arr)
+
+
diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim
new file mode 100644
index 000000000..4c6065fd6
--- /dev/null
+++ b/tests/parallel/tinvalid_array_bounds.nim
@@ -0,0 +1,25 @@
+discard """
+  errormsg: "can prove: i + 1 > 30"
+  line: 21
+"""
+
+import threadpool
+
+proc f(a: openArray[int]) =
+  for x in a: echo x
+
+proc f(a: int) = echo a
+
+proc main() =
+  var a: array[0..30, int]
+  parallel:
+    spawn f(a[0..15])
+    spawn f(a[16..30])
+    var i = 0
+    while i <= 30:
+      spawn f(a[i])
+      spawn f(a[i+1])
+      inc i
+      #inc i  # inc i, 2  would be correct here
+
+main()
diff --git a/tests/parallel/tinvalid_counter_usage.nim b/tests/parallel/tinvalid_counter_usage.nim
new file mode 100644
index 000000000..c6303c651
--- /dev/null
+++ b/tests/parallel/tinvalid_counter_usage.nim
@@ -0,0 +1,26 @@
+discard """
+  errormsg: "invalid usage of counter after increment"
+  line: 21
+"""
+
+import threadpool
+
+proc f(a: openArray[int]) =
+  for x in a: echo x
+
+proc f(a: int) = echo a
+
+proc main() =
+  var a: array[0..30, int]
+  parallel:
+    spawn f(a[0..15])
+    spawn f(a[16..30])
+    var i = 0
+    while i <= 30:
+      inc i
+      spawn f(a[i])
+      inc i
+      #spawn f(a[i+1])
+      #inc i  # inc i, 2  would be correct here
+
+main()
diff --git a/tests/parallel/tnon_disjoint_slice1.nim b/tests/parallel/tnon_disjoint_slice1.nim
new file mode 100644
index 000000000..72d008bbd
--- /dev/null
+++ b/tests/parallel/tnon_disjoint_slice1.nim
@@ -0,0 +1,25 @@
+discard """
+  errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)"
+  line: 20
+"""
+
+import threadpool
+
+proc f(a: openArray[int]) =
+  for x in a: echo x
+
+proc f(a: int) = echo a
+
+proc main() =
+  var a: array[0..30, int]
+  parallel:
+    #spawn f(a[0..15])
+    #spawn f(a[16..30])
+    var i = 0
+    while i <= 29:
+      spawn f(a[i])
+      spawn f(a[i+1])
+      inc i
+      #inc i  # inc i, 2  would be correct here
+
+main()
diff --git a/tests/parallel/tpi.nim b/tests/parallel/tpi.nim
new file mode 100644
index 000000000..dcb9b8fc5
--- /dev/null
+++ b/tests/parallel/tpi.nim
@@ -0,0 +1,26 @@
+discard """
+  output: '''3.141792613595791
+3.141792613595791'''
+"""
+
+import strutils, math, threadpool
+
+proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1)
+
+proc piU(n: int): float =
+  var ch = newSeq[FlowVar[float]](n+1)
+  for k in 0..n:
+    ch[k] = spawn term(float(k))
+  for k in 0..n:
+    result += ^ch[k]
+
+proc piS(n: int): float =
+  var ch = newSeq[float](n+1)
+  parallel:
+    for k in 0..ch.high:
+      ch[k] = spawn term(float(k))
+  for k in 0..ch.high:
+    result += ch[k]
+
+echo formatFloat(piU(5000))
+echo formatFloat(piS(5000))
diff --git a/tests/system/tsysspawn.nim b/tests/parallel/tsysspawn.nim
index 0388918aa..fc7921b0e 100644
--- a/tests/system/tsysspawn.nim
+++ b/tests/parallel/tsysspawn.nim
@@ -4,20 +4,22 @@ discard """
   cmd: "nimrod $target --threads:on $options $file"
 """
 
+import threadpool
+
 var
   x, y = 0
 
 proc p1 =
-  for i in 0 .. 1_000_000:
+  for i in 0 .. 10_000:
     discard
 
-  inc x
+  atomicInc x
 
 proc p2 =
-  for i in 0 .. 1_000_000:
+  for i in 0 .. 10_000:
     discard
 
-  inc y, 2
+  atomicInc y, 2
 
 for i in 0.. 3:
   spawn(p1())
diff --git a/tests/parallel/tsysspawnbadarg.nim b/tests/parallel/tsysspawnbadarg.nim
new file mode 100644
index 000000000..ad798a7d3
--- /dev/null
+++ b/tests/parallel/tsysspawnbadarg.nim
@@ -0,0 +1,9 @@
+discard """
+  line: 9
+  errormsg: "'spawn' takes a call expression"
+  cmd: "nimrod $target --threads:on $options $file"
+"""
+
+import threadpool
+
+let foo = spawn(1)
diff --git a/tests/sets/tsets3.nim b/tests/sets/tsets3.nim
new file mode 100644
index 000000000..d2b15d72d
--- /dev/null
+++ b/tests/sets/tsets3.nim
@@ -0,0 +1,81 @@
+include sets
+
+let
+  s1: TSet[int] = toSet([1, 2, 4, 8, 16])
+  s2: TSet[int] = toSet([1, 2, 3, 5, 8])
+  s3: TSet[int] = toSet([3, 5, 7])
+
+block union:
+  let
+    s1_s2 = union(s1, s2)
+    s1_s3 = s1 + s3
+    s2_s3 = s2 + s3
+
+  assert s1_s2.len == 7
+  assert s1_s3.len == 8
+  assert s2_s3.len == 6
+
+  for i in s1:
+    assert i in s1_s2
+    assert i in s1_s3
+  for i in s2:
+    assert i in s1_s2
+    assert i in s2_s3
+  for i in s3:
+    assert i in s1_s3
+    assert i in s2_s3
+
+  assert((s1 + s1) == s1)
+  assert((s2 + s1) == s1_s2)
+
+block intersection:
+  let
+    s1_s2 = intersection(s1, s2)
+    s1_s3 = intersection(s1, s3)
+    s2_s3 = s2 * s3
+
+  assert s1_s2.len == 3
+  assert s1_s3.len == 0
+  assert s2_s3.len == 2
+
+  for i in s1_s2:
+    assert i in s1
+    assert i in s2
+  for i in s1_s3:
+    assert i in s1
+    assert i in s3
+  for i in s2_s3:
+    assert i in s2
+    assert i in s3
+
+  assert((s2 * s2) == s2)
+  assert((s3 * s2) == s2_s3)
+
+block symmetricDifference:
+  let
+    s1_s2 = symmetricDifference(s1, s2)
+    s1_s3 = s1 -+- s3
+    s2_s3 = s2 -+- s3
+
+  assert s1_s2.len == 4
+  assert s1_s3.len == 8
+  assert s2_s3.len == 4
+
+  for i in s1:
+    assert i in s1_s2 xor i in s2
+    assert i in s1_s3 xor i in s3
+  for i in s2:
+    assert i in s1_s2 xor i in s1
+    assert i in s2_s3 xor i in s3
+  for i in s3:
+    assert i in s1_s3 xor i in s1
+    assert i in s2_s3 xor i in s2
+
+  assert((s3 -+- s3) == initSet[int]())
+  assert((s3 -+- s1) == s1_s3)
+
+block disjoint:
+  assert(not disjoint(s1, s2))
+  assert disjoint(s1, s3)
+  assert(not disjoint(s2, s3))
+  assert(not disjoint(s2, s2))
\ No newline at end of file
diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim
deleted file mode 100644
index ace074602..000000000
--- a/tests/system/tsysspawnbadarg.nim
+++ /dev/null
@@ -1,7 +0,0 @@
-discard """
-  line: 7
-  errormsg: "'spawn' takes a call expression of type void"
-  cmd: "nimrod $target --threads:on $options $file"
-"""
-
-spawn(1)
diff --git a/tests/testament/caasdriver.nim b/tests/testament/caasdriver.nim
index 8804f3ed7..ddfe88273 100644
--- a/tests/testament/caasdriver.nim
+++ b/tests/testament/caasdriver.nim
@@ -106,7 +106,7 @@ proc close(session: var TNimrodSession) {.destructor.} =
   if session.mode == CaasRun:
     session.nim.close
 
-proc doScenario(script: string, output: PStream, mode: TRunMode): bool =
+proc doScenario(script: string, output: PStream, mode: TRunMode, verbose: bool): bool =
   result = true
 
   var f = open(script)
@@ -138,7 +138,7 @@ proc doScenario(script: string, output: PStream, mode: TRunMode): bool =
         continue
       elif line.startsWith(">"):
         s.doCommand(line.substr(1).strip)
-        output.writeln line, "\n", s.lastOutput
+        output.writeln line, "\n", if verbose: s.lastOutput else: ""
       else:
         var expectMatch = true
         var pattern = s.replaceVars(line)
@@ -155,13 +155,14 @@ proc doScenario(script: string, output: PStream, mode: TRunMode): bool =
           output.writeln "FAILURE ", line
           result = false
 
-iterator caasTestsRunner*(filter = ""): tuple[test, output: string,
-                                              status: bool, mode: TRunMode] =
+iterator caasTestsRunner*(filter = "", verbose = false): tuple[test,
+                                              output: string, status: bool,
+                                              mode: TRunMode] =
   for scenario in os.walkFiles(TesterDir / "caas/*.txt"):
     if filter.len > 0 and find(scenario, filter) == -1: continue
     for mode in modes:
       var outStream = newStringStream()
-      let r = doScenario(scenario, outStream, mode)
+      let r = doScenario(scenario, outStream, mode, verbose)
       yield (scenario, outStream.data, r, mode)
 
 when isMainModule:
@@ -179,9 +180,12 @@ when isMainModule:
   if verbose and len(filter) > 0:
     echo "Running only test cases matching filter '$1'" % [filter]
 
-  for test, output, result, mode in caasTestsRunner(filter):
+  for test, output, result, mode in caasTestsRunner(filter, verbose):
     if not result or verbose:
-      echo test, "\n", output, "-> ", $mode, ":", $result, "\n-----"
+      echo "Mode ", $mode, " (", if result: "succeeded)" else: "failed)"
+      echo test
+      echo output
+      echo "---------\n"
     if not result:
       failures += 1
 
diff --git a/tests/testament/specs.nim b/tests/testament/specs.nim
index 225ea1891..6e72f4b5e 100644
--- a/tests/testament/specs.nim
+++ b/tests/testament/specs.nim
@@ -46,7 +46,7 @@ type
     msg*: string
     ccodeCheck*: string
     err*: TResultEnum
-    substr*: bool
+    substr*, sortoutput*: bool
     targets*: set[TTarget]
 
 const
@@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec =
       result.action = actionRun
       result.outp = e.value
       result.substr = true
+    of "sortoutput":
+      result.sortoutput = parseCfgBool(e.value)
     of "exitcode": 
       discard parseInt(e.value, result.exitCode)
     of "msg":
diff --git a/tests/testament/tester.nim b/tests/testament/tester.nim
index 50d0e6eac..fc6b4ff95 100644
--- a/tests/testament/tester.nim
+++ b/tests/testament/tester.nim
@@ -11,7 +11,8 @@
 
 import
   parseutils, strutils, pegs, os, osproc, streams, parsecfg, json,
-  marshal, backend, parseopt, specs, htmlgen, browsers, terminal
+  marshal, backend, parseopt, specs, htmlgen, browsers, terminal,
+  algorithm
 
 const
   resultsFile = "testresults.html"
@@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) =
     except EIO:
       given.err = reCodeNotFound
 
+proc makeDeterministic(s: string): string =
+  var x = splitLines(s)
+  sort(x, system.cmp)
+  result = join(x, "\n")
+
 proc testSpec(r: var TResults, test: TTest) =
   # major entry point for a single test
   let tname = test.name.addFileExt(".nim")
@@ -191,8 +197,10 @@ proc testSpec(r: var TResults, test: TTest) =
             r.addResult(test, "exitcode: " & $expected.exitCode,
                               "exitcode: " & $exitCode, reExitCodesDiffer)
           else:
-            if strip(buf.string) != strip(expected.outp):
-              if not (expected.substr and expected.outp in buf.string):
+            var bufB = strip(buf.string)
+            if expected.sortoutput: bufB = makeDeterministic(bufB)
+            if bufB != strip(expected.outp):
+              if not (expected.substr and expected.outp in bufB):
                 given.err = reOutputsDiffer
             if given.err == reSuccess:
               codeGenCheck(test, expected.ccodeCheck, given)
diff --git a/todo.txt b/todo.txt
index 539089281..df18ff7e9 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,6 +1,23 @@
 version 0.9.6
 =============
 
+Concurrency
+-----------
+
+- the disjoint checker needs to deal with 'a = spawn f(); g = spawn f()'
+- implement 'deepCopy' builtin
+- implement 'foo[1..4] = spawn(f[4..7])'
+- support for exception propagation
+- Minor: The copying of the 'ref Promise' into the thead local storage only
+  happens to work due to the write barrier's implementation
+- 'gcsafe' inferrence needs to be fixed
+- implement lock levels --> first without the more complex race avoidance
+- document the new 'spawn' and 'parallel' statements
+
+
+Misc
+----
+
 - fix the bug that keeps 'defer' template from working
 - make '--implicitStatic:on' the default
 - fix the tuple unpacking in lambda bug
diff --git a/web/assets/style.css b/web/assets/style.css
index 5cee279fc..91bb4bd8a 100644
--- a/web/assets/style.css
+++ b/web/assets/style.css
@@ -93,7 +93,7 @@ html, body {
         border-left:10px solid #8f9698;
         background:#f3f6f8;
         font-size:15px;
-        font-family:courier;
+        font-family:courier, monospace;
         letter-spacing:0;
         line-height:17px;
       }
diff --git a/web/news.txt b/web/news.txt
index 0bbae7b7b..b7403a3c7 100644
--- a/web/news.txt
+++ b/web/news.txt
@@ -2,6 +2,23 @@
 News
 ====
 
+..
+  2014-06-29 Version 0.9.6 released
+  =================================
+
+  Changes affecting backwards compatibility
+  -----------------------------------------
+
+  - ``spawn`` now uses an elaborate self-adapting thread pool and as such
+    has been moved into its own module. So to use it, you now have to import
+    ``threadpool``.
+
+
+  Library Additions
+  -----------------
+
+  - Added module ``cpuinfo``.
+  - Added module ``threadpool``.
 
 
 2014-04-21 Version 0.9.4 released