78 files changed, 9241 insertions, 595 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index 0790df0c4..7199aa72d 100644
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -364,6 +364,7 @@ type
     nfSem       # node has been checked for semantics
     nfDelegate  # the call can use a delegator
     nfExprCall  # this is an attempt to call a regular expression
+    nfIsRef     # this node is a 'ref' node; used for the VM
 
   TNodeFlags* = set[TNodeFlag]
   TTypeFlag* = enum   # keep below 32 for efficiency reasons (now: 23)
@@ -792,7 +793,7 @@ const
   ExportableSymKinds* = {skVar, skConst, skProc, skMethod, skType, skIterator, 
     skMacro, skTemplate, skConverter, skEnumField, skLet, skStub}
   PersistentNodeFlags*: TNodeFlags = {nfBase2, nfBase8, nfBase16,
-                                      nfAllConst, nfDelegate}
+                                      nfAllConst, nfDelegate, nfIsRef}
   namePos* = 0
   patternPos* = 1    # empty except for term rewriting macros
   genericParamsPos* = 2
diff --git a/compiler/astalgo.nim b/compiler/astalgo.nim
index 6c48dd00f..4f869cfca 100644
--- a/compiler/astalgo.nim
+++ b/compiler/astalgo.nim
@@ -216,7 +216,7 @@ proc makeYamlString*(s: string): PRope =
   const MaxLineLength = 64
   result = nil
   var res = "\""
-  for i in countup(0, len(s) - 1): 
+  for i in countup(0, if s.isNil: -1 else: (len(s)-1)): 
     if (i + 1) mod MaxLineLength == 0: 
       add(res, '\"')
       add(res, "\n")
diff --git a/compiler/ccgstmts.nim b/compiler/ccgstmts.nim
index ac4bbb79f..d9e6d83d0 100644
--- a/compiler/ccgstmts.nim
+++ b/compiler/ccgstmts.nim
@@ -322,8 +322,20 @@ proc genComputedGoto(p: BProc; n: PNode) =
     gotoArray.appf("&&TMP$#, ", (id+i).toRope)
   gotoArray.appf("&&TMP$#};$n", (id+arraySize).toRope)
   line(p, cpsLocals, gotoArray)
+
+  let topBlock = p.blocks.len-1
+  let oldBody = p.blocks[topBlock].sections[cpsStmts]
+  p.blocks[topBlock].sections[cpsStmts] = nil
   
+  for j in casePos+1 .. <n.len: genStmts(p, n.sons[j])
+  let tailB = p.blocks[topBlock].sections[cpsStmts]
+
+  p.blocks[topBlock].sections[cpsStmts] = nil
   for j in 0 .. casePos-1: genStmts(p, n.sons[j])
+  let tailA = p.blocks[topBlock].sections[cpsStmts]
+
+  p.blocks[topBlock].sections[cpsStmts] = oldBody.con(tailA)
+
   let caseStmt = n.sons[casePos]
   var a: TLoc
   initLocExpr(p, caseStmt.sons[0], a)
@@ -340,8 +352,11 @@ proc genComputedGoto(p: BProc; n: PNode) =
       let val = getOrdValue(it.sons[j])
       lineF(p, cpsStmts, "TMP$#:$n", intLiteral(val+id+1))
     genStmts(p, it.lastSon)
-    for j in casePos+1 .. <n.len: genStmts(p, n.sons[j])
-    for j in 0 .. casePos-1: genStmts(p, n.sons[j])
+    #for j in casePos+1 .. <n.len: genStmts(p, n.sons[j]) # tailB
+    #for j in 0 .. casePos-1: genStmts(p, n.sons[j])  # tailA
+    app(p.s(cpsStmts), tailB)
+    app(p.s(cpsStmts), tailA)
+
     var a: TLoc
     initLocExpr(p, caseStmt.sons[0], a)
     lineF(p, cpsStmts, "goto *$#[$#];$n", tmp, a.rdLoc)
diff --git a/compiler/commands.nim b/compiler/commands.nim
index d3266930b..a02728dac 100644
--- a/compiler/commands.nim
+++ b/compiler/commands.nim
@@ -398,13 +398,13 @@ proc processSwitch(switch, arg: string, pass: TCmdlinePass, info: TLineInfo) =
     if pass in {passCmd2, passPP}: extccomp.addLinkOption(arg)
   of "cincludes":
     expectArg(switch, arg, pass, info)
-    if pass in {passCmd2, passPP}: cIncludes.add arg
+    if pass in {passCmd2, passPP}: cIncludes.add arg.processPath
   of "clibdir":
     expectArg(switch, arg, pass, info)
-    if pass in {passCmd2, passPP}: cLibs.add arg
+    if pass in {passCmd2, passPP}: cLibs.add arg.processPath
   of "clib":
     expectArg(switch, arg, pass, info)
-    if pass in {passCmd2, passPP}: cLinkedLibs.add arg
+    if pass in {passCmd2, passPP}: cLinkedLibs.add arg.processPath
   of "header":
     headerFile = arg
     incl(gGlobalOptions, optGenIndex)
diff --git a/compiler/evalffi.nim b/compiler/evalffi.nim
index 21a131996..3b8ce0505 100644
--- a/compiler/evalffi.nim
+++ b/compiler/evalffi.nim
@@ -9,7 +9,7 @@
 
 ## This file implements the FFI part of the evaluator for Nimrod code.
 
-import ast, astalgo, ropes, types, options, tables, dynlib, libffi, msgs
+import ast, astalgo, ropes, types, options, tables, dynlib, libffi, msgs, os
 
 when defined(windows):
   const libcDll = "msvcrt.dll"
@@ -20,7 +20,11 @@ type
   TDllCache = tables.TTable[string, TLibHandle]
 var
   gDllCache = initTable[string, TLibHandle]()
-  gExeHandle = LoadLib()
+
+when defined(windows):
+  var gExeHandle = loadLib(os.getAppFilename())
+else:
+  var gExeHandle = loadLib()
 
 proc getDll(cache: var TDllCache; dll: string; info: TLineInfo): pointer =
   result = cache[dll]
@@ -28,15 +32,17 @@ proc getDll(cache: var TDllCache; dll: string; info: TLineInfo): pointer =
     var libs: seq[string] = @[]
     libCandidates(dll, libs)
     for c in libs:
-      result = LoadLib(c)
+      result = loadLib(c)
       if not result.isNil: break
     if result.isNil:
-      GlobalError(info, "cannot load: " & dll)
+      globalError(info, "cannot load: " & dll)
     cache[dll] = result
 
 const
   nkPtrLit = nkIntLit # hopefully we can get rid of this hack soon
 
+var myerrno {.importc: "errno", header: "<errno.h>".}: cint ## error variable
+
 proc importcSymbol*(sym: PSym): PNode =
   let name = ropeToStr(sym.loc.r)
   
@@ -47,10 +53,11 @@ proc importcSymbol*(sym: PSym): PNode =
   of "stdin":  result.intVal = cast[TAddress](system.stdin)
   of "stdout": result.intVal = cast[TAddress](system.stdout)
   of "stderr": result.intVal = cast[TAddress](system.stderr)
+  of "vmErrnoWrapper": result.intVal = cast[TAddress](myerrno)
   else:
     let lib = sym.annex
     if lib != nil and lib.path.kind notin {nkStrLit..nkTripleStrLit}:
-      GlobalError(sym.info, "dynlib needs to be a string lit for the REPL")
+      globalError(sym.info, "dynlib needs to be a string lit for the REPL")
     var theAddr: pointer
     if lib.isNil and not gExehandle.isNil:
       # first try this exe itself:
@@ -58,10 +65,12 @@ proc importcSymbol*(sym: PSym): PNode =
       # then try libc:
       if theAddr.isNil:
         let dllhandle = gDllCache.getDll(libcDll, sym.info)
-        theAddr = dllhandle.checkedSymAddr(name)
-    else:
-      let dllhandle = gDllCache.getDll(lib.path.strVal, sym.info)
-      theAddr = dllhandle.checkedSymAddr(name)
+        theAddr = dllhandle.symAddr(name)
+    elif not lib.isNil:
+      let dllhandle = gDllCache.getDll(if lib.kind == libHeader: libcDll 
+                                       else: lib.path.strVal, sym.info)
+      theAddr = dllhandle.symAddr(name)
+    if theAddr.isNil: globalError(sym.info, "cannot import: " & sym.name.s)
     result.intVal = cast[TAddress](theAddr)
 
 proc mapType(t: ast.PType): ptr libffi.TType =
@@ -139,7 +148,7 @@ proc getField(n: PNode; position: int): PSym =
       else: internalError(n.info, "getField(record case branch)")
   of nkSym:
     if n.sym.position == position: result = n.sym
-  else: nil
+  else: discard
 
 proc packObject(x: PNode, typ: PType, res: pointer) =
   InternalAssert x.kind in {nkObjConstr, nkPar}
@@ -192,7 +201,7 @@ proc pack(v: PNode, typ: PType, res: pointer) =
   of tyPointer, tyProc,  tyCString, tyString:
     if v.kind == nkNilLit:
       # nothing to do since the memory is 0 initialized anyway
-      nil
+      discard
     elif v.kind == nkPtrLit:
       awr(pointer, cast[pointer](v.intVal))
     elif v.kind in {nkStrLit..nkTripleStrLit}:
@@ -202,7 +211,7 @@ proc pack(v: PNode, typ: PType, res: pointer) =
   of tyPtr, tyRef, tyVar:
     if v.kind == nkNilLit:
       # nothing to do since the memory is 0 initialized anyway
-      nil
+      discard
     elif v.kind == nkPtrLit:
       awr(pointer, cast[pointer](v.intVal))
     else:
@@ -220,7 +229,7 @@ proc pack(v: PNode, typ: PType, res: pointer) =
   of tyObject, tyTuple:
     packObject(v, typ, res)
   of tyNil:
-    nil
+    discard
   of tyDistinct, tyGenericInst:
     pack(v, typ.sons[0], res)
   else:
@@ -241,7 +250,7 @@ proc unpackObjectAdd(x: pointer, n, result: PNode) =
     pair.sons[1] = unpack(x +! n.sym.offset, n.sym.typ, nil)
     #echo "offset: ", n.sym.name.s, " ", n.sym.offset
     result.add pair
-  else: nil
+  else: discard
 
 proc unpackObject(x: pointer, typ: PType, n: PNode): PNode =
   # compute the field's offsets:
@@ -441,3 +450,46 @@ proc callForeignFunction*(call: PNode): PNode =
   for i in 1 .. call.len-1:
     call.sons[i] = unpack(args[i-1], typ.sons[i], call[i])
     dealloc args[i-1]
+
+proc callForeignFunction*(fn: PNode, fntyp: PType,
+                          args: var TNodeSeq, start, len: int,
+                          info: TLineInfo): PNode =
+  internalAssert fn.kind == nkPtrLit
+  
+  var cif: TCif
+  var sig: TParamList
+  for i in 0..len-1:
+    var aTyp = args[i+start].typ
+    if aTyp.isNil:
+      internalAssert i+1 < fntyp.len
+      aTyp = fntyp.sons[i+1]
+      args[i+start].typ = aTyp
+    sig[i] = mapType(aTyp)
+    if sig[i].isNil: globalError(info, "cannot map FFI type")
+  
+  if prep_cif(cif, mapCallConv(fntyp.callConv, info), cuint(len),
+              mapType(fntyp.sons[0]), sig) != OK:
+    globalError(info, "error in FFI call")
+  
+  var cargs: TArgList
+  let fn = cast[pointer](fn.intVal)
+  for i in 0 .. len-1:
+    let t = args[i+start].typ
+    cargs[i] = alloc0(packSize(args[i+start], t))
+    pack(args[i+start], t, cargs[i])
+  let retVal = if isEmptyType(fntyp.sons[0]): pointer(nil)
+               else: alloc(fntyp.sons[0].getSize.int)
+
+  libffi.call(cif, fn, retVal, cargs)
+  
+  if retVal.isNil: 
+    result = emptyNode
+  else:
+    result = unpack(retVal, fntyp.sons[0], nil)
+    result.info = info
+
+  if retVal != nil: dealloc retVal
+  for i in 0 .. len-1:
+    let t = args[i+start].typ
+    args[i+start] = unpack(cargs[i], t, args[i+start])
+    dealloc cargs[i]
diff --git a/compiler/evaltempl.nim b/compiler/evaltempl.nim
index 05be0e9d3..4bff9ae5e 100644
--- a/compiler/evaltempl.nim
+++ b/compiler/evaltempl.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2013 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -16,9 +16,14 @@ import
 type
   TemplCtx {.pure, final.} = object
     owner, genSymOwner: PSym
+    instLines: bool   # use the instantiation lines numbers
     mapping: TIdTable # every gensym'ed symbol needs to be mapped to some
                       # new symbol
 
+proc copyNode(ctx: TemplCtx, a, b: PNode): PNode =
+  result = copyNode(a)
+  if ctx.instLines: result.info = b.info
+
 proc evalTemplateAux(templ, actual: PNode, c: var TemplCtx, result: PNode) =
   case templ.kind
   of nkSym:
@@ -37,43 +42,17 @@ proc evalTemplateAux(templ, actual: PNode, c: var TemplCtx, result: PNode) =
           x = copySym(s, false)
           x.owner = c.genSymOwner
           IdTablePut(c.mapping, s, x)
-        result.add newSymNode(x, templ.info)
+        result.add newSymNode(x, if c.instLines: actual.info else: templ.info)
     else:
-      result.add copyNode(templ)
+      result.add copyNode(c, templ, actual)
   of nkNone..nkIdent, nkType..nkNilLit: # atom
-    result.add copyNode(templ)
+    result.add copyNode(c, templ, actual)
   else:
-    var res = copyNode(templ)
+    var res = copyNode(c, templ, actual)
     for i in countup(0, sonsLen(templ) - 1): 
       evalTemplateAux(templ.sons[i], actual, c, res)
     result.add res
 
-when false:
-  proc evalTemplateAux(templ, actual: PNode, c: var TemplCtx): PNode =
-    case templ.kind
-    of nkSym:
-      var s = templ.sym
-      if s.owner.id == c.owner.id:
-        if s.kind == skParam:
-          result = copyTree(actual.sons[s.position])
-        else:
-          InternalAssert sfGenSym in s.flags
-          var x = PSym(IdTableGet(c.mapping, s))
-          if x == nil:
-            x = copySym(s, false)
-            x.owner = c.genSymOwner
-            IdTablePut(c.mapping, s, x)
-          result = newSymNode(x, templ.info)
-      else:
-        result = copyNode(templ)
-    of nkNone..nkIdent, nkType..nkNilLit: # atom
-      result = copyNode(templ)
-    else:
-      result = copyNode(templ)
-      newSons(result, sonsLen(templ))
-      for i in countup(0, sonsLen(templ) - 1): 
-        result.sons[i] = evalTemplateAux(templ.sons[i], actual, c)
-
 proc evalTemplateArgs(n: PNode, s: PSym): PNode =
   # if the template has zero arguments, it can be called without ``()``
   # `n` is then a nkSym or something similar
@@ -118,7 +97,9 @@ proc evalTemplate*(n: PNode, tmpl, genSymOwner: PSym): PNode =
                   renderTree(result, {renderNoComments}))
   else:
     result = copyNode(body)
-    #evalTemplateAux(body, args, ctx, result)
+    ctx.instLines = body.kind notin {nkStmtList, nkStmtListExpr,
+                                     nkBlockStmt, nkBlockExpr}
+    if ctx.instLines: result.info = n.info
     for i in countup(0, safeLen(body) - 1):
       evalTemplateAux(body.sons[i], args, ctx, result)
   
diff --git a/compiler/lambdalifting.nim b/compiler/lambdalifting.nim
index 96eb3a5f4..dd48a0bc3 100644
--- a/compiler/lambdalifting.nim
+++ b/compiler/lambdalifting.nim
@@ -207,7 +207,9 @@ proc newCall(a, b: PSym): PNode =
 
 proc addHiddenParam(routine: PSym, param: PSym) =
   var params = routine.ast.sons[paramsPos]
-  param.position = params.len
+  # -1 is correct here as param.position is 0 based but we have at position 0
+  # some nkEffect node:
+  param.position = params.len-1
   addSon(params, newSymNode(param))
   incl(routine.typ.flags, tfCapturesEnv)
   #echo "produced environment: ", param.id, " for ", routine.name.s
@@ -549,6 +551,8 @@ proc transformOuterProc(o: POuterContext, n: PNode): PNode =
       if x != nil: n.sons[i] = x
 
 proc liftLambdas*(fn: PSym, body: PNode): PNode =
+  # XXX gCmd == cmdCompileToJS does not suffice! The compiletime stuff needs
+  # the transformation even when compiling to JS ...
   if body.kind == nkEmpty or gCmd == cmdCompileToJS:
     # ignore forward declaration:
     result = body
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 82bfa0ad4..eb9287dfe 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -41,11 +41,12 @@ type
     tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface, 
     tkIs, tkIsnot, tkIterator,
     tkLambda, tkLet,
-    tkMacro, tkMethod, tkMixin, tkUsing, tkMod, tkNil, tkNot, tkNotin, 
+    tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin, 
     tkObject, tkOf, tkOr, tkOut, 
     tkProc, tkPtr, tkRaise, tkRef, tkReturn, tkShared, tkShl, tkShr, tkStatic,
     tkTemplate, 
-    tkTry, tkTuple, tkType, tkVar, tkWhen, tkWhile, tkWith, tkWithout, tkXor,
+    tkTry, tkTuple, tkType, tkUsing, 
+    tkVar, tkWhen, tkWhile, tkWith, tkWithout, tkXor,
     tkYield, # end of keywords
     tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit,
     tkUIntLit, tkUInt8Lit, tkUInt16Lit, tkUInt32Lit, tkUInt64Lit,
@@ -75,12 +76,13 @@ const
     "finally", "for", "from", "generic", "if", 
     "import", "in", "include", "interface", "is", "isnot", "iterator",
     "lambda", "let", 
-    "macro", "method", "mixin", "using", "mod", 
+    "macro", "method", "mixin", "mod", 
     "nil", "not", "notin", "object", "of", "or", 
     "out", "proc", "ptr", "raise", "ref", "return", 
     "shared", "shl", "shr", "static",
     "template", 
-    "try", "tuple", "type", "var", "when", "while", "with", "without", "xor",
+    "try", "tuple", "type", "using",
+    "var", "when", "while", "with", "without", "xor",
     "yield",
     "tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit",
     "tkUIntLit", "tkUInt8Lit", "tkUInt16Lit", "tkUInt32Lit", "tkUInt64Lit",
diff --git a/compiler/main.nim b/compiler/main.nim
index 9ffe99454..b91b596b0 100644
--- a/compiler/main.nim
+++ b/compiler/main.nim
@@ -14,7 +14,7 @@ import
   os, condsyms, rodread, rodwrite, times,
   wordrecg, sem, semdata, idents, passes, docgen, extccomp,
   cgen, jsgen, json, nversion,
-  platform, nimconf, importer, passaux, depends, evals, types, idgen,
+  platform, nimconf, importer, passaux, depends, vm, vmdef, types, idgen,
   tables, docgen2, service, parser, modules, ccgutils, sigmatch, ropes, lists,
   pretty
 
diff --git a/compiler/msgs.nim b/compiler/msgs.nim
index 895ba71f3..2a7d54d4e 100644
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -700,11 +700,9 @@ type
 
 proc handleError(msg: TMsgKind, eh: TErrorHandling, s: string) =
   template maybeTrace =
-    if defined(debug) or gVerbosity >= 3:
+    if defined(debug) or gVerbosity >= 3 or msg == errInternal:
       writeStackTrace()
 
-  if msg == errInternal:
-    writeStackTrace() # we always want a stack trace here
   if msg >= fatalMin and msg <= fatalMax: 
     maybeTrace()
     quit(1)
diff --git a/compiler/nimrod.nimrod.cfg b/compiler/nimrod.nimrod.cfg
index 9fa1b8cba..b2ae97686 100644
--- a/compiler/nimrod.nimrod.cfg
+++ b/compiler/nimrod.nimrod.cfg
@@ -13,3 +13,6 @@ path:"$lib/packages/docutils"
 define:booting
 import:testability
 
+@if windows:
+  cincludes: "$lib/wrappers/libffi/common"
+@end
diff --git a/compiler/renderer.nim b/compiler/renderer.nim
index f6fb0f8c0..c8fe70e02 100644
--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -557,7 +557,7 @@ proc longMode(n: PNode, start: int = 0, theEnd: int = - 1): bool =
 
 proc gstmts(g: var TSrcGen, n: PNode, c: TContext) = 
   if n.kind == nkEmpty: return 
-  if (n.kind == nkStmtList) or (n.kind == nkStmtListExpr): 
+  if n.kind in {nkStmtList, nkStmtListExpr, nkStmtListType}:
     indentNL(g)
     for i in countup(0, sonsLen(n) - 1): 
       optNL(g)
@@ -1069,7 +1069,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
     put(g, tkSpaces, Space)
     putWithSpace(g, tkEquals, "=")
     gsub(g, n.sons[1])
-  of nkStmtList, nkStmtListExpr: gstmts(g, n, emptyContext)
+  of nkStmtList, nkStmtListExpr, nkStmtListType: gstmts(g, n, emptyContext)
   of nkIfStmt: 
     putWithSpace(g, tkIf, "if")
     gif(g, n)
@@ -1246,8 +1246,12 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
       put(g, tkBracketLe, "[")
       gcomma(g, n)
       put(g, tkBracketRi, "]")
+  of nkMetaNode:
+    put(g, tkParLe, "(META|")
+    gsub(g, n.sons[0])
+    put(g, tkParRi, ")")
   else: 
-    #nkNone, nkMetaNode, nkExplicitTypeListCall: 
+    #nkNone, nkExplicitTypeListCall: 
     InternalError(n.info, "rnimsyn.gsub(" & $n.kind & ')')
 
 proc renderTree(n: PNode, renderFlags: TRenderFlags = {}): string = 
diff --git a/compiler/sem.nim b/compiler/sem.nim
index 3ace623bc..ed3c0e045 100644
--- a/compiler/sem.nim
+++ b/compiler/sem.nim
@@ -14,7 +14,7 @@ import
   wordrecg, ropes, msgs, os, condsyms, idents, renderer, types, platform, math,
   magicsys, parser, nversion, nimsets, semfold, importer,
   procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
-  semthreads, intsets, transf, evals, idgen, aliases, cgmeth, lambdalifting,
+  semthreads, intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
   evaltempl, patterns, parampatterns, sempass2
 
 # implementation
@@ -43,7 +43,7 @@ proc activate(c: PContext, n: PNode)
 proc semQuoteAst(c: PContext, n: PNode): PNode
 proc finishMethod(c: PContext, s: PSym)
 
-proc IndexTypesMatch(c: PContext, f, a: PType, arg: PNode): PNode
+proc indexTypesMatch(c: PContext, f, a: PType, arg: PNode): PNode
 
 proc typeMismatch(n: PNode, formal, actual: PType) = 
   if formal.kind != tyError and actual.kind != tyError: 
@@ -63,7 +63,7 @@ proc fitNode(c: PContext, formal: PType, arg: PNode): PNode =
     if result == nil:
       typeMismatch(arg, formal, arg.typ)
       # error correction:
-      result = copyNode(arg)
+      result = copyTree(arg)
       result.typ = formal
 
 var CommonTypeBegin = PType(kind: tyExpr)
@@ -169,25 +169,26 @@ when false:
     result = newSymNode(symFromType(t, info), info)
     result.typ = makeTypeDesc(c, t)
 
-proc createEvalContext(c: PContext, mode: TEvalMode): PEvalContext =
-  result = newEvalContext(c.module, mode)
-  result.getType = proc (n: PNode): PNode =
-    result = tryExpr(c, n)
-    if result == nil:
-      result = newSymNode(errorSym(c, n))
-    elif result.typ == nil:
-      result = newSymNode(getSysSym"void")
-    else:
-      result.typ = makeTypeDesc(c, result.typ)
+when false:
+  proc createEvalContext(c: PContext, mode: TEvalMode): PEvalContext =
+    result = newEvalContext(c.module, mode)
+    result.getType = proc (n: PNode): PNode =
+      result = tryExpr(c, n)
+      if result == nil:
+        result = newSymNode(errorSym(c, n))
+      elif result.typ == nil:
+        result = newSymNode(getSysSym"void")
+      else:
+        result.typ = makeTypeDesc(c, result.typ)
 
-  result.handleIsOperator = proc (n: PNode): PNode =
-    result = IsOpImpl(c, n)
+    result.handleIsOperator = proc (n: PNode): PNode =
+      result = IsOpImpl(c, n)
 
-proc evalConstExpr(c: PContext, module: PSym, e: PNode): PNode = 
-  result = evalConstExprAux(c.createEvalContext(emConst), module, nil, e)
+  proc evalConstExpr(c: PContext, module: PSym, e: PNode): PNode = 
+    result = evalConstExprAux(c.createEvalContext(emConst), module, nil, e)
 
-proc evalStaticExpr(c: PContext, module: PSym, e: PNode, prc: PSym): PNode = 
-  result = evalConstExprAux(c.createEvalContext(emStatic), module, prc, e)
+  proc evalStaticExpr(c: PContext, module: PSym, e: PNode, prc: PSym): PNode = 
+    result = evalConstExprAux(c.createEvalContext(emStatic), module, prc, e)
 
 proc semConstExpr(c: PContext, n: PNode): PNode =
   var e = semExprWithType(c, n)
@@ -196,7 +197,7 @@ proc semConstExpr(c: PContext, n: PNode): PNode =
     return n
   result = getConstExpr(c.module, e)
   if result == nil:
-    result = evalConstExpr(c, c.module, e)
+    result = evalConstExpr(c.module, e)
     if result == nil or result.kind == nkEmpty:
       if e.info != n.info:
         pushInfoContext(n.info)
@@ -206,6 +207,19 @@ proc semConstExpr(c: PContext, n: PNode): PNode =
         LocalError(e.info, errConstExprExpected)
       # error correction:
       result = e
+    else:
+      # recompute the types as 'eval' isn't guaranteed to construct types nor
+      # that the types are sound:
+      result = semExprWithType(c, result)
+      #result = fitNode(c, e.typ, result) inlined with special case:
+      let arg = result
+      result = indexTypesMatch(c, e.typ, arg.typ, arg)
+      if result == nil:
+        result = arg
+        # for 'tcnstseq' we support [] to become 'seq'
+        if e.typ.skipTypes(abstractInst).kind == tySequence and 
+           arg.typ.skipTypes(abstractInst).kind == tyArrayConstr:
+          arg.typ = e.typ
 
 include hlo, seminst, semcall
 
@@ -243,10 +257,10 @@ proc semMacroExpr(c: PContext, n, nOrig: PNode, sym: PSym,
   if sym == c.p.owner:
     GlobalError(n.info, errRecursiveDependencyX, sym.name.s)
 
-  if c.evalContext == nil:
-    c.evalContext = c.createEvalContext(emStatic)
+  #if c.evalContext == nil:
+  #  c.evalContext = c.createEvalContext(emStatic)
 
-  result = evalMacroCall(c.evalContext, n, nOrig, sym)
+  result = evalMacroCall(c.module, n, nOrig, sym)
   if semCheck: result = semAfterMacroCall(c, result, sym)
 
 proc forceBool(c: PContext, n: PNode): PNode = 
diff --git a/compiler/semdata.nim b/compiler/semdata.nim
index d02359d4c..650a399f7 100644
--- a/compiler/semdata.nim
+++ b/compiler/semdata.nim
@@ -13,7 +13,7 @@ import
   strutils, lists, intsets, options, lexer, ast, astalgo, trees, treetab,
   wordrecg, 
   ropes, msgs, platform, os, condsyms, idents, renderer, types, extccomp, math, 
-  magicsys, nversion, nimsets, parser, times, passes, rodread, evals
+  magicsys, nversion, nimsets, parser, times, passes, rodread, vmdef
 
 type 
   TOptionEntry* = object of lists.TListEntry # entries to put on a
diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim
index 310aabc32..c45b83095 100644
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -300,11 +300,11 @@ proc semOf(c: PContext, n: PNode): PNode =
   result = n
 
 proc isOpImpl(c: PContext, n: PNode): PNode =
-  InternalAssert n.sonsLen == 3 and
-    n[1].kind == nkSym and n[1].sym.kind == skType and
+  internalAssert n.sonsLen == 3 and
+    n[1].typ != nil and
     n[2].kind in {nkStrLit..nkTripleStrLit, nkType}
   
-  let t1 = n[1].sym.typ.skipTypes({tyTypeDesc})
+  let t1 = n[1].typ.skipTypes({tyTypeDesc})
 
   if n[2].kind in {nkStrLit..nkTripleStrLit}:
     case n[2].strVal.normalize
@@ -640,18 +640,18 @@ proc evalAtCompileTime(c: PContext, n: PNode): PNode =
       call.add(a)
     #echo "NOW evaluating at compile time: ", call.renderTree
     if sfCompileTime in callee.flags:
-      result = evalStaticExpr(c, c.module, call, c.p.owner)
+      result = evalStaticExpr(c.module, call, c.p.owner)
       if result.isNil: 
         LocalError(n.info, errCannotInterpretNodeX, renderTree(call))
     else:
-      result = evalConstExpr(c, c.module, call)
+      result = evalConstExpr(c.module, call)
       if result.isNil: result = n
     #if result != n:
     #  echo "SUCCESS evaluated at compile time: ", call.renderTree
 
 proc semStaticExpr(c: PContext, n: PNode): PNode =
   let a = semExpr(c, n.sons[0])
-  result = evalStaticExpr(c, c.module, a, c.p.owner)
+  result = evalStaticExpr(c.module, a, c.p.owner)
   if result.isNil:
     LocalError(n.info, errCannotInterpretNodeX, renderTree(n))
     result = emptyNode
@@ -780,6 +780,7 @@ proc semDirectOp(c: PContext, n: PNode, flags: TExprFlags): PNode =
   #semLazyOpAux(c, n)
   result = semOverloadedCallAnalyseEffects(c, n, nOrig, flags)
   if result != nil: result = afterCallActions(c, result, nOrig, flags)
+  else: result = errorNode(c, n)
 
 proc buildStringify(c: PContext, arg: PNode): PNode = 
   if arg.typ != nil and 
@@ -1839,7 +1840,7 @@ proc semExpr(c: PContext, n: PNode, flags: TExprFlags = {}): PNode =
     # don't have to check the symbol for semantics here again!
     result = semSym(c, n, n.sym, flags)
   of nkEmpty, nkNone, nkCommentStmt: 
-    nil
+    discard
   of nkNilLit: 
     result.typ = getSysType(tyNil)
   of nkIntLit:
diff --git a/compiler/semfold.nim b/compiler/semfold.nim
index ca06ea1b6..fb1816f9c 100644
--- a/compiler/semfold.nim
+++ b/compiler/semfold.nim
@@ -229,6 +229,33 @@ discard """
   mShrI, mShrI64, mAddF64, mSubF64, mMulF64, mDivF64, mMaxF64, mMinF64
 """
 
+proc evalIs(n, a: PNode): PNode =
+  internalAssert a.kind == nkSym and a.sym.kind == skType
+  internalAssert n.sonsLen == 3 and
+    n[2].kind in {nkStrLit..nkTripleStrLit, nkType}
+  
+  let t1 = a.sym.typ
+
+  if n[2].kind in {nkStrLit..nkTripleStrLit}:
+    case n[2].strVal.normalize
+    of "closure":
+      let t = skipTypes(t1, abstractRange)
+      result = newIntNode(nkIntLit, ord(t.kind == tyProc and
+                                        t.callConv == ccClosure and 
+                                        tfIterator notin t.flags))
+    of "iterator":
+      let t = skipTypes(t1, abstractRange)
+      result = newIntNode(nkIntLit, ord(t.kind == tyProc and
+                                        t.callConv == ccClosure and 
+                                        tfIterator in t.flags))
+  else:
+    # XXX semexprs.isOpImpl is slightly different and requires a context. yay.
+    let t2 = n[2].typ
+    var match = if t2.kind == tyTypeClass: matchTypeClass(t2, t1)
+                else: sameType(t1, t2)
+    result = newIntNode(nkIntLit, ord(match))
+  result.typ = n.typ
+
 proc evalOp(m: TMagic, n, a, b, c: PNode): PNode = 
   # b and c may be nil
   result = nil
@@ -372,7 +399,7 @@ proc evalOp(m: TMagic, n, a, b, c: PNode): PNode =
      mAppendStrStr, mAppendSeqElem, mSetLengthStr, mSetLengthSeq, 
      mParseExprToAst, mParseStmtToAst, mExpandToAst, mTypeTrait,
      mNLen..mNError, mEqRef, mSlurp, mStaticExec, mNGenSym: 
-    nil
+    discard
   of mRand:
     result = newIntNodeT(math.random(a.getInt.int), n)
   else: InternalError(a.info, "evalOp(" & $m & ')')
@@ -446,8 +473,6 @@ proc magicCall(m: PSym, n: PNode): PNode =
     if sonsLen(n) > 3: 
       c = getConstExpr(m, n.sons[3])
       if c == nil: return 
-  else: 
-    b = nil
   result = evalOp(s.magic, n, a, b, c)
   
 proc getAppType(n: PNode): PNode =
@@ -485,7 +510,7 @@ proc foldConv*(n, a: PNode; check = false): PNode =
       result = a
       result.typ = n.typ
   of tyOpenArray, tyVarargs, tyProc: 
-    nil
+    discard
   else: 
     result = a
     result.typ = n.typ
@@ -523,7 +548,7 @@ proc foldArrayAccess(m: PSym, n: PNode): PNode =
       nil
     else: 
       LocalError(n.info, errIndexOutOfBounds)
-  else: nil
+  else: discard
   
 proc foldFieldAccess(m: PSym, n: PNode): PNode =
   # a real field access; proc calls have already been transformed
@@ -592,7 +617,7 @@ proc getConstExpr(m: PSym, n: PNode): PNode =
         result.typ = s.typ.sons[0]
       else:
         result = newSymNodeTypeDesc(s, n.info)
-    else: nil
+    else: discard
   of nkCharLit..nkNilLit: 
     result = copyNode(n)
   of nkIfExpr: 
@@ -604,7 +629,8 @@ proc getConstExpr(m: PSym, n: PNode): PNode =
     try:
       case s.magic
       of mNone:
-        return # XXX: if it has no sideEffect, it should be evaluated
+        # If it has no sideEffect, it should be evaluated. But not here.
+        return
       of mSizeOf:
         var a = n.sons[1]
         if computeSize(a.typ) < 0: 
@@ -644,6 +670,10 @@ proc getConstExpr(m: PSym, n: PNode): PNode =
         result = newStrNodeT(renderTree(n[1], {renderNoComments}), n)
       of mConStrStr:
         result = foldConStrStr(m, n)
+      of mIs:
+        let a = getConstExpr(m, n[1])
+        if a != nil and a.kind == nkSym and a.sym.kind == skType:
+          result = evalIs(n, a)
       else:
         result = magicCall(m, n)
     except EOverflow: 
@@ -727,4 +757,4 @@ proc getConstExpr(m: PSym, n: PNode): PNode =
   of nkBracketExpr: result = foldArrayAccess(m, n)
   of nkDotExpr: result = foldFieldAccess(m, n)
   else:
-    nil
+    discard
diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim
index 88567b10a..aab4c82f5 100644
--- a/compiler/semmagic.nim
+++ b/compiler/semmagic.nim
@@ -32,15 +32,29 @@ proc semInstantiationInfo(c: PContext, n: PNode): PNode =
   result.add(filename)
   result.add(line)
 
+ 
+proc evalTypeTrait(trait: PNode, operand: PType, context: PSym): PNode =
+  let typ = operand.skipTypes({tyTypeDesc})
+  case trait.sym.name.s.normalize
+  of "name":
+    result = newStrNode(nkStrLit, typ.typeToString(preferName))
+    result.typ = newType(tyString, context)
+    result.info = trait.info
+  of "arity":    
+    result = newIntNode(nkIntLit, typ.n.len-1)
+    result.typ = newType(tyInt, context)
+    result.info = trait.info
+  else:
+    internalAssert false
+
 proc semTypeTraits(c: PContext, n: PNode): PNode =
   checkMinSonsLen(n, 2)
-  internalAssert n.sons[1].kind == nkSym
-  let typArg = n.sons[1].sym
-  if typArg.kind == skType or
-    (typArg.kind == skParam and typArg.typ.sonsLen > 0):
-    # This is either a type known to sem or a typedesc
-    # param to a regular proc (again, known at instantiation)
-    result = evalTypeTrait(n[0], n[1], GetCurrOwner())
+  let t = n.sons[1].typ
+  internalAssert t != nil
+  if t.kind == tyTypeDesc and t.len == 0:
+    result = n
+  elif not containsGenericType(t):
+    result = evalTypeTrait(n[0], t, GetCurrOwner())
   else:
     # a typedesc variable, pass unmodified to evals
     result = n
diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim
index a1805fdec..6f0cc3c8b 100644
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -425,7 +425,9 @@ proc semConst(c: PContext, n: PNode): PNode =
       def = fitRemoveHiddenConv(c, typ, def)
     else:
       typ = def.typ
-    if typ == nil: continue
+    if typ == nil:
+      LocalError(a.sons[2].info, errConstExprExpected)
+      continue
     if not typeAllowed(typ, skConst):
       LocalError(a.info, errXisNoType, typeToString(typ))
       continue
@@ -1159,20 +1161,25 @@ proc setLine(n: PNode, info: TLineInfo) =
 proc semPragmaBlock(c: PContext, n: PNode): PNode =
   let pragmaList = n.sons[0]
   pragma(c, nil, pragmaList, exprPragmas)
-  result = semStmt(c, n.sons[1])
+  result = semExpr(c, n.sons[1])
   for i in 0 .. <pragmaList.len:
     if whichPragma(pragmaList.sons[i]) == wLine:
       setLine(result, pragmaList.sons[i].info)
 
 proc semStaticStmt(c: PContext, n: PNode): PNode =
   let a = semStmt(c, n.sons[0])
-  result = evalStaticExpr(c, c.module, a, c.p.owner)
-  if result.isNil:
-    LocalError(n.info, errCannotInterpretNodeX, renderTree(n))
-    result = emptyNode
-  elif result.kind == nkEmpty:
-    result = newNodeI(nkDiscardStmt, n.info, 1)
-    result.sons[0] = emptyNode
+  n.sons[0] = a
+  evalStaticStmt(c.module, a, c.p.owner)
+  result = newNodeI(nkDiscardStmt, n.info, 1)
+  result.sons[0] = emptyNode
+  when false:
+    result = evalStaticStmt(c.module, a, c.p.owner)
+    if result.isNil:
+      LocalError(n.info, errCannotInterpretNodeX, renderTree(n))
+      result = emptyNode
+    elif result.kind == nkEmpty:
+      result = newNodeI(nkDiscardStmt, n.info, 1)
+      result.sons[0] = emptyNode
 
 proc usesResult(n: PNode): bool =
   # nkStmtList(expr) properly propagates the void context,
diff --git a/compiler/transf.nim b/compiler/transf.nim
index 206c21c3d..77642a3b8 100644
--- a/compiler/transf.nim
+++ b/compiler/transf.nim
@@ -735,12 +735,9 @@ proc transformBody*(module: PSym, n: PNode, prc: PSym): PNode =
   if nfTransf in n.flags or prc.kind in {skTemplate}:
     result = n
   else:
-    #when useEffectSystem: trackProc(prc, n)
     var c = openTransf(module, "")
     result = processTransf(c, n, prc)
-    if prc.kind != skMacro:
-      # XXX no closures yet for macros:
-      result = liftLambdas(prc, result)
+    result = liftLambdas(prc, result)
     if prc.kind == skIterator and prc.typ.callConv == ccClosure:
       result = lambdalifting.liftIterator(prc, result)
     incl(result.flags, nfTransf)
diff --git a/compiler/vm.nim b/compiler/vm.nim
index 7705746de..709baf7b2 100644
--- a/compiler/vm.nim
+++ b/compiler/vm.nim
@@ -10,11 +10,17 @@
 ## This file implements the new evaluation engine for Nimrod code.
 ## An instruction is 1-2 int32s in memory, it is a register based VM.
 
+import ast except getstr
+
 import
-  strutils, ast, astalgo, msgs, vmdef, vmgen, nimsets, types, passes, unsigned,
-  parser, vmdeps, idents
+  strutils, astalgo, msgs, vmdef, vmgen, nimsets, types, passes, unsigned,
+  parser, vmdeps, idents, trees, renderer, options
 
 from semfold import leValueConv, ordinalValToString
+from evaltempl import evalTemplate
+
+when hasFFI:
+  import evalffi
 
 type
   PStackFrame* = ref TStackFrame
@@ -46,9 +52,9 @@ proc stackTraceAux(c: PCtx; x: PStackFrame; pc: int) =
 
 proc stackTrace(c: PCtx, tos: PStackFrame, pc: int,
                 msg: TMsgKind, arg = "") =
-  MsgWriteln("stack trace: (most recent call last)")
+  msgWriteln("stack trace: (most recent call last)")
   stackTraceAux(c, tos, pc)
-  LocalError(c.debug[pc], msg, arg)
+  localError(c.debug[pc], msg, arg)
 
 proc bailOut(c: PCtx; tos: PStackFrame) =
   stackTrace(c, tos, c.exceptionInstr, errUnhandledExceptionX,
@@ -57,15 +63,25 @@ proc bailOut(c: PCtx; tos: PStackFrame) =
 when not defined(nimComputedGoto):
   {.pragma: computedGoto.}
 
-template inc(pc: ptr TInstr, diff = 1) =
-  inc cast[TAddress](pc), TInstr.sizeof * diff
-
 proc myreset(n: PNode) =
   when defined(system.reset): 
     var oldInfo = n.info
     reset(n[])
     n.info = oldInfo
 
+proc skipMeta(n: PNode): PNode = (if n.kind != nkMetaNode: n else: n.sons[0])
+
+proc setMeta(n, child: PNode) =
+  assert n.kind == nkMetaNode
+  let child = child.skipMeta
+  if n.sons.isNil: n.sons = @[child]
+  else: n.sons[0] = child
+
+proc uast(n: PNode): PNode {.inline.} =
+  # "underlying ast"
+  assert n.kind == nkMetaNode
+  n.sons[0]
+
 template ensureKind(k: expr) {.immediate, dirty.} =
   if regs[ra].kind != k:
     myreset(regs[ra])
@@ -96,23 +112,53 @@ template decodeBx(k: expr) {.immediate, dirty.} =
 template move(a, b: expr) = system.shallowCopy(a, b)
 # XXX fix minor 'shallowCopy' overloading bug in compiler
 
-proc asgnRef(x, y: PNode) =
-  myreset(x)
-  x.kind = y.kind
+proc moveConst(x, y: PNode) =
+  if x.kind != y.kind:
+    myreset(x)
+    x.kind = y.kind
   x.typ = y.typ
   case x.kind
   of nkCharLit..nkInt64Lit: x.intVal = y.intVal
   of nkFloatLit..nkFloat64Lit: x.floatVal = y.floatVal
-  of nkStrLit..nkTripleStrLit: x.strVal = y.strVal
+  of nkStrLit..nkTripleStrLit: move(x.strVal, y.strVal)
   of nkIdent: x.ident = y.ident
   of nkSym: x.sym = y.sym
+  of nkMetaNode:
+    if x.sons.isNil: x.sons = @[y.sons[0]]
+    else: x.sons[0] = y.sons[0]
   else:
     if x.kind notin {nkEmpty..nkNilLit}:
       move(x.sons, y.sons)
 
+# this seems to be the best way to model the reference semantics
+# of PNimrodNode:
+template asgnRef(x, y: expr) = moveConst(x, y)
+
+proc copyValue(src: PNode): PNode =
+  if src == nil or nfIsRef in src.flags:
+    return src
+  result = newNode(src.kind)
+  result.info = src.info
+  result.typ = src.typ
+  result.flags = src.flags * PersistentNodeFlags
+  when defined(useNodeIds):
+    if result.id == nodeIdToDebug:
+      echo "COMES FROM ", src.id
+  case src.Kind
+  of nkCharLit..nkUInt64Lit: result.intVal = src.intVal
+  of nkFloatLit..nkFloat128Lit: result.floatVal = src.floatVal
+  of nkSym: result.sym = src.sym
+  of nkIdent: result.ident = src.ident
+  of nkStrLit..nkTripleStrLit: result.strVal = src.strVal
+  else:
+    newSeq(result.sons, sonsLen(src))
+    for i in countup(0, sonsLen(src) - 1):
+      result.sons[i] = copyValue(src.sons[i])
+
 proc asgnComplex(x, y: PNode) =
-  myreset(x)
-  x.kind = y.kind
+  if x.kind != y.kind:
+    myreset(x)
+    x.kind = y.kind
   x.typ = y.typ
   case x.kind
   of nkCharLit..nkInt64Lit: x.intVal = y.intVal
@@ -120,13 +166,16 @@ proc asgnComplex(x, y: PNode) =
   of nkStrLit..nkTripleStrLit: x.strVal = y.strVal
   of nkIdent: x.ident = y.ident
   of nkSym: x.sym = y.sym
+  of nkMetaNode:
+    if x.sons.isNil: x.sons = @[y.sons[0]]
+    else: x.sons[0] = y.sons[0]
   else:
     if x.kind notin {nkEmpty..nkNilLit}:
-      let y = y.copyTree
+      let y = y.copyValue
       for i in countup(0, sonsLen(y) - 1): addSon(x, y.sons[i])
 
 template getstr(a: expr): expr =
-  (if a.kind == nkStrLit: a.strVal else: $chr(int(a.intVal)))
+  (if a.kind in {nkStrLit..nkTripleStrLit}: a.strVal else: $chr(int(a.intVal)))
 
 proc pushSafePoint(f: PStackFrame; pc: int) =
   if f.safePoints.isNil: f.safePoints = @[]
@@ -231,16 +280,23 @@ proc compile(c: PCtx, s: PSym): int =
   result = vmgen.genProc(c, s)
   #c.echoCode
 
-proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
+proc regsContents(regs: TNodeSeq) =
+  for i in 0.. <regs.len:
+    echo "Register ", i
+    #debug regs[i]
+
+proc rawExecute(c: PCtx, start: int, tos: PStackFrame): PNode =
   var pc = start
   var tos = tos
   var regs: TNodeSeq # alias to tos.slots for performance
   move(regs, tos.slots)
+  #echo "NEW RUN ------------------------"
   while true:
-    {.computedGoto.}
+    #{.computedGoto.}
     let instr = c.code[pc]
     let ra = instr.regA
     #echo "PC ", pc, " ", c.code[pc].opcode, " ra ", ra
+    #message(c.debug[pc], warnUser, "gah")
     case instr.opcode
     of opcEof: return regs[ra]
     of opcRet:
@@ -248,12 +304,15 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       pc = tos.comesFrom
       tos = tos.next
       let retVal = regs[0]
-      if tos.isNil: return retVal
+      if tos.isNil: 
+        #echo "RET ", retVal.rendertree
+        return retVal
       
       move(regs, tos.slots)
       assert c.code[pc].opcode in {opcIndCall, opcIndCallAsgn}
       if c.code[pc].opcode == opcIndCallAsgn:
         regs[c.code[pc].regA] = retVal
+        #echo "RET2 ", retVal.rendertree, " ", c.code[pc].regA
     of opcYldYoid: assert false
     of opcYldVal: assert false
     of opcAsgnInt:
@@ -270,31 +329,45 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcAsgnRef:
       asgnRef(regs[ra], regs[instr.regB])
     of opcWrGlobalRef:
-      asgnRef(c.globals[instr.regBx-wordExcess-1], regs[ra])
+      asgnRef(c.globals.sons[instr.regBx-wordExcess-1], regs[ra])
     of opcWrGlobal:
       asgnComplex(c.globals.sons[instr.regBx-wordExcess-1], regs[ra])
     of opcLdArr:
       # a = b[c]
       let rb = instr.regB
       let rc = instr.regC
-      let idx = regs[rc].intVal
+      let idx = regs[rc].intVal.int
       # XXX what if the array is not 0-based? -> codegen should insert a sub
-      regs[ra] = regs[rb].sons[idx.int]
+      assert regs[rb].kind != nkMetaNode
+      let src = regs[rb]
+      if src.kind notin {nkEmpty..nkNilLit} and idx <% src.len:
+        asgnComplex(regs[ra], src.sons[idx])
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcLdStrIdx:
       decodeBC(nkIntLit)
-      let idx = regs[rc].intVal
-      regs[ra].intVal = regs[rb].strVal[idx.int].ord
+      let idx = regs[rc].intVal.int
+      if idx <=% regs[rb].strVal.len:
+        regs[ra].intVal = regs[rb].strVal[idx].ord
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcWrArr:
       # a[b] = c
       let rb = instr.regB
       let rc = instr.regC
-      let idx = regs[rb].intVal
-      asgnComplex(regs[ra].sons[idx.int], regs[rc])
+      let idx = regs[rb].intVal.int
+      if idx <% regs[ra].len:
+        asgnComplex(regs[ra].sons[idx], regs[rc])
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcWrArrRef:
       let rb = instr.regB
       let rc = instr.regC
-      let idx = regs[rb].intVal
-      asgnRef(regs[ra].sons[idx.int], regs[rc])
+      let idx = regs[rb].intVal.int
+      if idx <% regs[ra].len:
+        asgnRef(regs[ra].sons[idx], regs[rc])
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcLdObj:
       # a = b.c
       let rb = instr.regB
@@ -306,6 +379,11 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       # a.b = c
       let rb = instr.regB
       let rc = instr.regC
+      #if regs[ra].isNil or regs[ra].sons.isNil or rb >= len(regs[ra]):
+      #  debug regs[ra]
+      #  debug regs[rc]
+      #  echo "RB ", rb
+      #  internalError(c.debug[pc], "argl")
       asgnComplex(regs[ra].sons[rb], regs[rc])
     of opcWrObjRef:
       let rb = instr.regB
@@ -314,7 +392,10 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcWrStrIdx:
       decodeBC(nkStrLit)
       let idx = regs[rb].intVal.int
-      regs[ra].strVal[idx] = chr(regs[rc].intVal)
+      if idx <% regs[ra].strVal.len:
+        regs[ra].strVal[idx] = chr(regs[rc].intVal)
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcAddr:
       decodeB(nkRefTy)
       if regs[ra].len == 0: regs[ra].add regs[rb]
@@ -325,6 +406,7 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       if regs[rb].kind == nkNilLit:
         stackTrace(c, tos, pc, errNilAccess)
       assert regs[rb].kind == nkRefTy
+      # XXX this is not correct
       regs[ra] = regs[rb].sons[0]
     of opcAddInt:
       decodeBC(nkIntLit)
@@ -341,8 +423,8 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcLenSeq:
       decodeBImm(nkIntLit)
       #assert regs[rb].kind == nkBracket
-      # also used by mNLen
-      regs[ra].intVal = regs[rb].len - imm
+      # also used by mNLen:
+      regs[ra].intVal = regs[rb].skipMeta.len - imm
     of opcLenStr:
       decodeBImm(nkIntLit)
       assert regs[rb].kind == nkStrLit
@@ -350,6 +432,12 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcIncl:
       decodeB(nkCurly)
       if not inSet(regs[ra], regs[rb]): addSon(regs[ra], copyTree(regs[rb]))
+    of opcInclRange:
+      decodeBC(nkCurly)
+      var r = newNode(nkRange)
+      r.add regs[rb]
+      r.add regs[rc]
+      addSon(regs[ra], r.copyTree)
     of opcExcl:
       decodeB(nkCurly)
       var b = newNodeIT(nkCurly, regs[rb].info, regs[rb].typ)
@@ -440,6 +528,9 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       regs[ra].intVal = ord((regs[rb].kind == nkNilLit and
                              regs[rc].kind == nkNilLit) or
                              regs[rb].sons == regs[rc].sons)
+    of opcEqNimrodNode:
+      decodeBC(nkIntLit)
+      regs[ra].intVal = ord(regs[rb].skipMeta == regs[rc].skipMeta)
     of opcXor:
       decodeBC(nkIntLit)
       regs[ra].intVal = ord(regs[rb].intVal != regs[rc].intVal)
@@ -461,24 +552,24 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       regs[ra].intVal = not regs[rb].intVal
     of opcEqStr:
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(regs[rb].strVal == regs[rc].strVal)
+      regs[ra].intVal = ord(regs[rb].strVal == regs[rc].strVal)
     of opcLeStr:
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(regs[rb].strVal <= regs[rc].strVal)
+      regs[ra].intVal = ord(regs[rb].strVal <= regs[rc].strVal)
     of opcLtStr:
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(regs[rb].strVal < regs[rc].strVal)
+      regs[ra].intVal = ord(regs[rb].strVal < regs[rc].strVal)
     of opcLeSet:
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(containsSets(regs[rb], regs[rc]))
+      regs[ra].intVal = ord(containsSets(regs[rb], regs[rc]))
     of opcEqSet: 
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(equalSets(regs[rb], regs[rc]))
+      regs[ra].intVal = ord(equalSets(regs[rb], regs[rc]))
     of opcLtSet:
       decodeBC(nkIntLit)
       let a = regs[rb]
       let b = regs[rc]
-      regs[ra].intVal = Ord(containsSets(a, b) and not equalSets(a, b))
+      regs[ra].intVal = ord(containsSets(a, b) and not equalSets(a, b))
     of opcMulSet:
       decodeBC(nkCurly)
       move(regs[ra].sons, nimsets.intersectSets(regs[rb], regs[rc]).sons)
@@ -508,12 +599,12 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcEcho:
       let rb = instr.regB
       for i in ra..ra+rb-1:
-        if regs[i].kind != nkStrLit: debug regs[i]
+        #if regs[i].kind != nkStrLit: debug regs[i]
         write(stdout, regs[i].strVal)
       writeln(stdout, "")
     of opcContainsSet:
       decodeBC(nkIntLit)
-      regs[ra].intVal = Ord(inSet(regs[rb], regs[rc]))
+      regs[ra].intVal = ord(inSet(regs[rb], regs[rc]))
     of opcSubStr:
       decodeBC(nkStrLit)
       inc pc
@@ -533,22 +624,55 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       # dest = call regStart, n; where regStart = fn, arg1, ...
       let rb = instr.regB
       let rc = instr.regC
-      let prc = regs[rb].sym
-      let newPc = compile(c, prc)
-      var newFrame = PStackFrame(prc: prc, comesFrom: pc, next: tos)
-      newSeq(newFrame.slots, prc.position)
-      if not isEmptyType(prc.typ.sons[0]):
-        newFrame.slots[0] = getNullValue(prc.typ.sons[0], prc.info)
-      # pass every parameter by var (the language definition allows this):
-      for i in 1 .. rc-1:
-        newFrame.slots[i] = regs[rb+i]
-      # allocate the temporaries:
-      for i in rc .. <prc.position:
-        newFrame.slots[i] = newNode(nkEmpty)
-      tos = newFrame
-      move(regs, newFrame.slots)
-      # -1 for the following 'inc pc'
-      pc = newPc-1
+      let isClosure = regs[rb].kind == nkPar
+      let prc = if not isClosure: regs[rb].sym else: regs[rb].sons[0].sym
+      if sfImportc in prc.flags:
+        if allowFFI notin c.features:
+          globalError(c.debug[pc], errGenerated, "VM not allowed to do FFI")
+        # we pass 'tos.slots' instead of 'regs' so that the compiler can keep
+        # 'regs' in a register:
+        when hasFFI:
+          let prcValue = c.globals.sons[prc.position-1]
+          if prcValue.kind == nkEmpty:
+            globalError(c.debug[pc], errGenerated, "canot run " & prc.name.s)
+          let newValue = callForeignFunction(prcValue, prc.typ, tos.slots,
+                                             rb+1, rc-1, c.debug[pc])
+          if newValue.kind != nkEmpty:
+            assert instr.opcode == opcIndCallAsgn
+            asgnRef(regs[ra], newValue)
+        else:
+          globalError(c.debug[pc], errGenerated, "VM not built with FFI support")
+      elif prc.kind != skTemplate:
+        let newPc = compile(c, prc)
+        #echo "new pc ", newPc, " calling: ", prc.name.s
+        var newFrame = PStackFrame(prc: prc, comesFrom: pc, next: tos)
+        newSeq(newFrame.slots, prc.offset)
+        if not isEmptyType(prc.typ.sons[0]) or prc.kind == skMacro:
+          newFrame.slots[0] = getNullValue(prc.typ.sons[0], prc.info)
+        # pass every parameter by var (the language definition allows this):
+        for i in 1 .. rc-1:
+          newFrame.slots[i] = regs[rb+i]
+        if isClosure:
+          newFrame.slots[rc] = regs[rb].sons[1]
+        # allocate the temporaries:
+        for i in rc+ord(isClosure) .. <prc.offset:
+          newFrame.slots[i] = newNode(nkEmpty)
+        tos = newFrame
+        move(regs, newFrame.slots)
+        # -1 for the following 'inc pc'
+        pc = newPc-1
+      else:
+        # for 'getAst' support we need to support template expansion here:
+        let genSymOwner = if tos.next != nil and tos.next.prc != nil:
+                            tos.next.prc
+                          else:
+                            c.module
+        var macroCall = newNodeI(nkCall, c.debug[pc])
+        macroCall.add(newSymNode(prc))
+        for i in 1 .. rc-1: macroCall.add(regs[rb+i].skipMeta)
+        let a = evalTemplate(macroCall, prc, genSymOwner)
+        ensureKind(nkMetaNode)
+        setMeta(regs[ra], a)
     of opcTJmp:
       # jump Bx if A != 0
       let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc'
@@ -564,18 +688,18 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc'
       inc pc, rbx
     of opcBranch:
-      # we know the next instruction is a 'jmp':
+      # we know the next instruction is a 'fjmp':
       let branch = c.constants[instr.regBx-wordExcess]
       var cond = false
       for j in countup(0, sonsLen(branch) - 2): 
         if overlap(regs[ra], branch.sons[j]): 
           cond = true
           break
-      assert c.code[pc+1].opcode == opcJmp
+      assert c.code[pc+1].opcode == opcFJmp
       inc pc 
       # we skip this instruction so that the final 'inc(pc)' skips
       # the following jump
-      if cond:
+      if not cond:
         let instr2 = c.code[pc]
         let rbx = instr2.regBx - wordExcess - 1 # -1 for the following 'inc pc'
         inc pc, rbx
@@ -608,6 +732,7 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcNew:
       let typ = c.types[instr.regBx - wordExcess]
       regs[ra] = getNullValue(typ, regs[ra].info)
+      regs[ra].flags.incl nfIsRef
     of opcNewSeq:
       let typ = c.types[instr.regBx - wordExcess]
       inc pc
@@ -629,7 +754,11 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       let typ = c.types[instr.regBx - wordExcess]
       regs[ra] = getNullValue(typ, c.debug[pc])
     of opcLdConst:
-      regs[ra] = c.constants.sons[instr.regBx - wordExcess]
+      let rb = instr.regBx - wordExcess
+      if regs[ra].isNil:
+        regs[ra] = copyTree(c.constants.sons[rb])
+      else:
+        moveConst(regs[ra], c.constants.sons[rb])
     of opcAsgnConst:
       let rb = instr.regBx - wordExcess
       if regs[ra].isNil:
@@ -637,67 +766,110 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       else:
         asgnComplex(regs[ra], c.constants.sons[rb])
     of opcLdGlobal:
-      let rb = instr.regBx - wordExcess
+      let rb = instr.regBx - wordExcess - 1
       if regs[ra].isNil:
         regs[ra] = copyTree(c.globals.sons[rb])
       else:
         asgnComplex(regs[ra], c.globals.sons[rb])
-    of opcRepr, opcSetLenStr, opcSetLenSeq,
-        opcSwap, opcIsNil, opcOf,
-        opcCast, opcQuit, opcReset:
+    of opcRepr:
+      decodeB(nkStrLit)
+      regs[ra].strVal = renderTree(regs[rb].skipMeta, {renderNoComments})
+    of opcQuit:
+      if c.mode in {emRepl, emStaticExpr, emStaticStmt}:
+        Message(c.debug[pc], hintQuitCalled)
+        quit(int(getOrdValue(regs[ra])))
+      else:
+        return nil
+    of opcSetLenStr:
+      decodeB(nkStrLit)
+      regs[ra].strVal.setLen(regs[rb].getOrdValue.int)
+    of opcOf:
+      decodeBC(nkIntLit)
+      let typ = c.types[regs[rc].intVal.int]
+      regs[ra].intVal = ord(inheritanceDiff(regs[rb].typ, typ) >= 0)
+    of opcIs:
+      decodeBC(nkIntLit)
+      let t1 = regs[rb].typ.skipTypes({tyTypeDesc})
+      let t2 = c.types[regs[rc].intVal.int]
+      let match = if t2.kind == tyTypeClass: matchTypeClass(t2, t1)
+                  else: sameType(t1, t2)
+      regs[ra].intVal = ord(match)
+    of opcSetLenSeq:
+      decodeB(nkBracket)
+      let newLen = regs[rb].getOrdValue.int
+      setLen(regs[ra].sons, newLen)
+    of opcSwap, opcReset:
       internalError(c.debug[pc], "too implement")
+    of opcIsNil:
+      decodeB(nkIntLit)
+      regs[ra].intVal = ord(regs[rb].skipMeta.kind == nkNilLit)
     of opcNBindSym:
-      # trivial implementation:
-      let rb = instr.regB
-      regs[ra] = regs[rb].sons[1]
+      decodeBx(nkMetaNode)
+      setMeta(regs[ra], copyTree(c.constants.sons[rbx]))
     of opcNChild:
-      let rb = instr.regB
-      let rc = instr.regC
-      regs[ra] = regs[rb].sons[regs[rc].intVal.int]
+      decodeBC(nkMetaNode)
+      if regs[rb].kind != nkMetaNode:
+        internalError(c.debug[pc], "no MetaNode")
+      let idx = regs[rc].intVal.int
+      let src = regs[rb].uast
+      if src.kind notin {nkEmpty..nkNilLit} and idx <% src.len:
+        setMeta(regs[ra], src.sons[idx])
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcNSetChild:
-      let rb = instr.regB
-      let rc = instr.regC
-      regs[ra].sons[regs[rb].intVal.int] = regs[rc]
+      decodeBC(nkMetaNode)
+      let idx = regs[rb].intVal.int
+      var dest = regs[ra].uast
+      if dest.kind notin {nkEmpty..nkNilLit} and idx <% dest.len:
+        dest.sons[idx] = regs[rc].uast
+      else:
+        stackTrace(c, tos, pc, errIndexOutOfBounds)
     of opcNAdd:
-      declBC()
-      regs[rb].add(regs[rb])
-      regs[ra] = regs[rb]
+      decodeBC(nkMetaNode)
+      var u = regs[rb].uast
+      u.add(regs[rc].uast)
+      setMeta(regs[ra], u)
     of opcNAddMultiple:
-      declBC()
+      decodeBC(nkMetaNode)
       let x = regs[rc]
+      var u = regs[rb].uast
       # XXX can be optimized:
-      for i in 0.. <x.len: regs[rb].add(x.sons[i])
-      regs[ra] = regs[rb]
+      for i in 0.. <x.len: u.add(x.sons[i].skipMeta)
+      setMeta(regs[ra], u)
     of opcNKind:
       decodeB(nkIntLit)
-      regs[ra].intVal = ord(regs[rb].kind)
+      regs[ra].intVal = ord(regs[rb].uast.kind)
     of opcNIntVal:
       decodeB(nkIntLit)
-      let a = regs[rb]
+      let a = regs[rb].uast
       case a.kind
       of nkCharLit..nkInt64Lit: regs[ra].intVal = a.intVal
       else: stackTrace(c, tos, pc, errFieldXNotFound, "intVal")
     of opcNFloatVal:
       decodeB(nkFloatLit)
-      let a = regs[rb]
+      let a = regs[rb].uast
       case a.kind
       of nkFloatLit..nkFloat64Lit: regs[ra].floatVal = a.floatVal
       else: stackTrace(c, tos, pc, errFieldXNotFound, "floatVal")
     of opcNSymbol:
-      let rb = instr.regB
-      if regs[rb].kind != nkSym: 
+      decodeB(nkSym)
+      let a = regs[rb].uast
+      if a.kind == nkSym:
+        regs[ra].sym = a.sym
+      else:
         stackTrace(c, tos, pc, errFieldXNotFound, "symbol")
-      regs[ra] = regs[rb]
     of opcNIdent:
-      let rb = instr.regB
-      if regs[rb].kind != nkIdent: 
+      decodeB(nkIdent)
+      let a = regs[rb].uast
+      if a.kind == nkIdent:
+        regs[ra].ident = a.ident
+      else:
         stackTrace(c, tos, pc, errFieldXNotFound, "ident")
-      regs[ra] = regs[rb]
     of opcNGetType:
-      InternalError(c.debug[pc], "unknown opcode " & $instr.opcode)      
+      InternalError(c.debug[pc], "unknown opcode " & $instr.opcode)
     of opcNStrVal:
       decodeB(nkStrLit)
-      let a = regs[rb]
+      let a = regs[rb].uast
       case a.kind
       of nkStrLit..nkTripleStrLit: regs[ra].strVal = a.strVal
       else: stackTrace(c, tos, pc, errFieldXNotFound, "strVal")
@@ -714,25 +886,26 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
     of opcNHint:
       Message(c.debug[pc], hintUser, regs[ra].strVal)
     of opcParseExprToAst:
-      let rb = instr.regB
+      decodeB(nkMetaNode)
       # c.debug[pc].line.int - countLines(regs[rb].strVal) ?
       let ast = parseString(regs[rb].strVal, c.debug[pc].toFilename,
                             c.debug[pc].line.int)
       if sonsLen(ast) != 1:
         GlobalError(c.debug[pc], errExprExpected, "multiple statements")
-      regs[ra] = ast.sons[0]
+      setMeta(regs[ra], ast.sons[0])
     of opcParseStmtToAst:
-      let rb = instr.regB
+      decodeB(nkMetaNode)
       let ast = parseString(regs[rb].strVal, c.debug[pc].toFilename,
                             c.debug[pc].line.int)
-      regs[ra] = ast
+      setMeta(regs[ra], ast)
     of opcCallSite:
-      if c.callsite != nil: regs[ra] = c.callsite
+      ensureKind(nkMetaNode)
+      if c.callsite != nil: setMeta(regs[ra], c.callsite)
       else: stackTrace(c, tos, pc, errFieldXNotFound, "callsite")
     of opcNLineInfo:
-      let rb = instr.regB
+      decodeB(nkStrLit)
       let n = regs[rb]
-      regs[ra] = newStrNode(nkStrLit, n.info.toFileLineCol)
+      regs[ra].strVal = n.info.toFileLineCol
       regs[ra].info = c.debug[pc]
     of opcEqIdent:
       decodeBC(nkIntLit)
@@ -741,16 +914,16 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
       else:
         regs[ra].intVal = 0
     of opcStrToIdent:
-      let rb = instr.regB
+      decodeB(nkIdent)
       if regs[rb].kind notin {nkStrLit..nkTripleStrLit}:
         stackTrace(c, tos, pc, errFieldXNotFound, "strVal")
       else:
-        regs[ra] = newNodeI(nkIdent, c.debug[pc])
+        regs[ra].info = c.debug[pc]
         regs[ra].ident = getIdent(regs[rb].strVal)
     of opcIdentToStr:
-      let rb = instr.regB
+      decodeB(nkStrLit)
       let a = regs[rb]
-      regs[ra] = newNodeI(nkStrLit, c.debug[pc])
+      regs[ra].info = c.debug[pc]
       if a.kind == nkSym:
         regs[ra].strVal = a.sym.name.s
       elif a.kind == nkIdent:
@@ -767,84 +940,117 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame) =
         stackTrace(c, tos, pc, errGenerated,
           msgKindToString(errIllegalConvFromXtoY) % [
           "unknown type" , "unknown type"])
-    of opcNSetIntVal:
+    of opcCast:
       let rb = instr.regB
-      if regs[ra].kind in {nkCharLit..nkInt64Lit} and 
+      inc pc
+      let typ = c.types[c.code[pc].regBx - wordExcess]
+      when hasFFI:
+        let dest = fficast(regs[rb], typ)
+        asgnRef(regs[ra], dest)
+      else:
+        globalError(c.debug[pc], "cannot evaluate cast")
+    of opcNSetIntVal:
+      decodeB(nkMetaNode)
+      var dest = regs[ra].uast
+      if dest.kind in {nkCharLit..nkInt64Lit} and 
          regs[rb].kind in {nkCharLit..nkInt64Lit}:
-        regs[ra].intVal = regs[rb].intVal
-      else: 
+        dest.intVal = regs[rb].intVal
+      else:
         stackTrace(c, tos, pc, errFieldXNotFound, "intVal")
     of opcNSetFloatVal:
-      let rb = instr.regB
-      if regs[ra].kind in {nkFloatLit..nkFloat64Lit} and 
+      decodeB(nkMetaNode)
+      var dest = regs[ra].uast
+      if dest.kind in {nkFloatLit..nkFloat64Lit} and 
          regs[rb].kind in {nkFloatLit..nkFloat64Lit}:
-        regs[ra].floatVal = regs[rb].floatVal
+        dest.floatVal = regs[rb].floatVal
       else: 
         stackTrace(c, tos, pc, errFieldXNotFound, "floatVal")
     of opcNSetSymbol:
-      let rb = instr.regB
-      if regs[ra].kind == nkSym and regs[rb].kind == nkSym:
-        regs[ra].sym = regs[rb].sym
+      decodeB(nkMetaNode)
+      var dest = regs[ra].uast
+      if dest.kind == nkSym and regs[rb].kind == nkSym:
+        dest.sym = regs[rb].sym
       else: 
         stackTrace(c, tos, pc, errFieldXNotFound, "symbol")
     of opcNSetIdent:
-      let rb = instr.regB
-      if regs[ra].kind == nkIdent and regs[rb].kind == nkIdent:
-        regs[ra].ident = regs[rb].ident
+      decodeB(nkMetaNode)
+      var dest = regs[ra].uast
+      if dest.kind == nkIdent and regs[rb].kind == nkIdent:
+        dest.ident = regs[rb].ident
       else: 
         stackTrace(c, tos, pc, errFieldXNotFound, "ident")
     of opcNSetType:
-      let b = regs[instr.regB]
+      decodeB(nkMetaNode)
+      let b = regs[rb].skipMeta
       InternalAssert b.kind == nkSym and b.sym.kind == skType
-      regs[ra].typ = b.sym.typ
+      regs[ra].uast.typ = b.sym.typ
     of opcNSetStrVal:
-      let rb = instr.regB
-      if regs[ra].kind in {nkStrLit..nkTripleStrLit} and 
+      decodeB(nkMetaNode)
+      var dest = regs[ra].uast
+      if dest.kind in {nkStrLit..nkTripleStrLit} and 
          regs[rb].kind in {nkStrLit..nkTripleStrLit}:
-        regs[ra].strVal = regs[rb].strVal
+        dest.strVal = regs[rb].strVal
       else:
         stackTrace(c, tos, pc, errFieldXNotFound, "strVal")
     of opcNNewNimNode:
-      let rb = instr.regB
-      let rc = instr.regC
+      decodeBC(nkMetaNode)
       var k = regs[rb].intVal
-      if k < 0 or k > ord(high(TNodeKind)): 
+      if k < 0 or k > ord(high(TNodeKind)) or k == ord(nkMetaNode):
         internalError(c.debug[pc],
-          "request to create a NimNode with invalid kind")
-      regs[ra] = newNodeI(TNodeKind(int(k)), 
-        if regs[rc].kind == nkNilLit: c.debug[pc] else: regs[rc].info)
+          "request to create a NimNode of invalid kind")
+      let cc = regs[rc].skipMeta
+      setMeta(regs[ra], newNodeI(TNodeKind(int(k)), 
+        if cc.kind == nkNilLit: c.debug[pc] else: cc.info))
+      regs[ra].sons[0].flags.incl nfIsRef
     of opcNCopyNimNode:
-      let rb = instr.regB
-      regs[ra] = copyNode(regs[rb])
+      decodeB(nkMetaNode)
+      setMeta(regs[ra], copyNode(regs[rb]))
     of opcNCopyNimTree:
-      let rb = instr.regB
-      regs[ra] = copyTree(regs[rb])
+      decodeB(nkMetaNode)
+      setMeta(regs[ra], copyTree(regs[rb]))
     of opcNDel:
-      let rb = instr.regB
-      let rc = instr.regC
+      decodeBC(nkMetaNode)
+      let bb = regs[rb].intVal.int
       for i in countup(0, regs[rc].intVal.int-1):
-        delSon(regs[ra], regs[rb].intVal.int)
+        delSon(regs[ra].uast, bb)
     of opcGenSym:
-      let k = regs[instr.regB].intVal
-      let b = regs[instr.regC]
-      let name = if b.strVal.len == 0: ":tmp" else: b.strVal
+      decodeBC(nkMetaNode)
+      let k = regs[rb].intVal
+      let name = if regs[rc].strVal.len == 0: ":tmp" else: regs[rc].strVal
       if k < 0 or k > ord(high(TSymKind)):
         internalError(c.debug[pc], "request to create symbol of invalid kind")
-      regs[ra] = newSymNode(newSym(k.TSymKind, name.getIdent, c.module,
-                            c.debug[pc]))
-      incl(regs[ra].sym.flags, sfGenSym)
+      var sym = newSym(k.TSymKind, name.getIdent, c.module, c.debug[pc])
+      incl(sym.flags, sfGenSym)
+      setMeta(regs[ra], newSymNode(sym))
     of opcTypeTrait:
       # XXX only supports 'name' for now; we can use regC to encode the
       # type trait operation
       decodeB(nkStrLit)
       let typ = regs[rb].sym.typ.skipTypes({tyTypeDesc})
       regs[ra].strVal = typ.typeToString(preferExported)
+    of opcGlobalOnce:
+      let rb = instr.regBx
+      if c.globals.sons[rb - wordExcess - 1].kind != nkEmpty:
+        # skip initialization instructions:
+        while true:
+          inc pc
+          if c.code[pc].opcode in {opcWrGlobal, opcWrGlobalRef} and
+             c.code[pc].regBx == rb:
+            break
+    of opcGlobalAlias:
+      let rb = instr.regBx - wordExcess - 1
+      regs[ra] = c.globals.sons[rb]
     inc pc
 
-proc execute(c: PCtx, start: int) =
+proc fixType(result, n: PNode) {.inline.} =
+  # XXX do it deeply for complex values
+  #if result.typ.isNil: result.typ = n.typ
+
+proc execute(c: PCtx, start: int): PNode =
   var tos = PStackFrame(prc: nil, comesFrom: 0, next: nil)
   newSeq(tos.slots, c.prc.maxSlots)
-  rawExecute(c, start, tos)
+  for i in 0 .. <c.prc.maxSlots: tos.slots[i] = newNode(nkEmpty)
+  result = rawExecute(c, start, tos)
 
 proc evalStmt*(c: PCtx, n: PNode) =
   let start = genStmt(c, n)
@@ -857,12 +1063,30 @@ proc evalExpr*(c: PCtx, n: PNode): PNode =
   let start = genExpr(c, n)
   assert c.code[start].opcode != opcEof
   result = execute(c, start)
+  if not result.isNil:
+    result = result.skipMeta
+    fixType(result, n)
+
+# for now we share the 'globals' environment. XXX Coming soon: An API for
+# storing&loading the 'globals' environment to get what a component system
+# requires.
+var
+  globalCtx: PCtx
+
+proc setupGlobalCtx(module: PSym) =
+  if globalCtx.isNil: globalCtx = newCtx(module)
+  else: refresh(globalCtx, module)
 
 proc myOpen(module: PSym): PPassContext =
   #var c = newEvalContext(module, emRepl)
   #c.features = {allowCast, allowFFI, allowInfiniteLoops}
   #pushStackFrame(c, newStackFrame())
-  result = newCtx(module)
+
+  # XXX produce a new 'globals' environment here:
+  setupGlobalCtx(module)
+  result = globalCtx
+  when hasFFI:
+    globalCtx.features = {allowFFI, allowCast}
 
 var oldErrorCount: int
 
@@ -875,50 +1099,70 @@ proc myProcess(c: PPassContext, n: PNode): PNode =
     result = n
   oldErrorCount = msgs.gErrorCounter
 
-const vmPass* = makePass(myOpen, nil, myProcess, myProcess)
+const evalPass* = makePass(myOpen, nil, myProcess, myProcess)
 
-proc evalConstExprAux(module, prc: PSym, e: PNode, mode: TEvalMode): PNode = 
-  var p = newCtx(module)
-  var s = newStackFrame()
-  s.call = e
-  s.prc = prc
-  pushStackFrame(p, s)
-  result = tryEval(p, e)
-  if result != nil and result.kind == nkExceptBranch: result = nil
-  popStackFrame(p)
+proc evalConstExprAux(module, prc: PSym, n: PNode, mode: TEvalMode): PNode =
+  setupGlobalCtx(module)
+  var c = globalCtx
+  c.mode = mode
+  let start = genExpr(c, n, requiresValue = mode!=emStaticStmt)
+  assert c.code[start].opcode != opcEof
+  var tos = PStackFrame(prc: prc, comesFrom: 0, next: nil)
+  newSeq(tos.slots, c.prc.maxSlots)
+  for i in 0 .. <c.prc.maxSlots: tos.slots[i] = newNode(nkEmpty)
+  result = rawExecute(c, start, tos)
+  fixType(result, n)
 
 proc evalConstExpr*(module: PSym, e: PNode): PNode = 
   result = evalConstExprAux(module, nil, e, emConst)
 
-proc evalStaticExpr*(module: PSym, e: PNode, prc: PSym): PNode = 
-  result = evalConstExprAux(module, prc, e, emStatic)
+proc evalStaticExpr*(module: PSym, e: PNode, prc: PSym): PNode =
+  result = evalConstExprAux(module, prc, e, emStaticExpr)
+
+proc evalStaticStmt*(module: PSym, e: PNode, prc: PSym) =
+  discard evalConstExprAux(module, prc, e, emStaticStmt)
 
 proc setupMacroParam(x: PNode): PNode =
   result = x
   if result.kind in {nkHiddenSubConv, nkHiddenStdConv}: result = result.sons[1]
+  let y = result
+  y.flags.incl nfIsRef
+  result = newNode(nkMetaNode)
+  result.add y
+  result.typ = x.typ
 
-proc evalMacroCall(c: PEvalContext, n, nOrig: PNode, sym: PSym): PNode =
+var evalMacroCounter: int
+
+proc evalMacroCall*(module: PSym, n, nOrig: PNode, sym: PSym): PNode =
   # XXX GlobalError() is ugly here, but I don't know a better solution for now
-  inc(evalTemplateCounter)
-  if evalTemplateCounter > 100:
+  inc(evalMacroCounter)
+  if evalMacroCounter > 100:
     GlobalError(n.info, errTemplateInstantiationTooNested)
+  setupGlobalCtx(module)
+  var c = globalCtx
 
   c.callsite = nOrig
-  let body = optBody(c, sym)
-  let start = genStmt(c, body)
+  let start = genProc(c, sym)
 
   var tos = PStackFrame(prc: sym, comesFrom: 0, next: nil)
-  newSeq(tos.slots, c.prc.maxSlots)
+  let maxSlots = sym.offset
+  newSeq(tos.slots, maxSlots)
   # setup arguments:
   var L = n.safeLen
   if L == 0: L = 1
-  InternalAssert tos.slots.len >= L
+  # This is wrong for tests/reject/tind1.nim where the passed 'else' part
+  # doesn't end up in the parameter:
+  #InternalAssert tos.slots.len >= L
   # return value:
   tos.slots[0] = newNodeIT(nkNilLit, n.info, sym.typ.sons[0])
   # setup parameters:
-  for i in 1 .. < L: tos.slots[i] = setupMacroParam(n.sons[i])
-  rawExecute(c, start, tos)
-  result = tos.slots[0]
+  for i in 1 .. < min(tos.slots.len, L):
+    tos.slots[i] = setupMacroParam(n.sons[i])
+  # temporary storage:
+  for i in L .. <maxSlots: tos.slots[i] = newNode(nkEmpty)
+  result = rawExecute(c, start, tos)
   if cyclicTree(result): GlobalError(n.info, errCyclicTree)
-  dec(evalTemplateCounter)
+  dec(evalMacroCounter)
+  if result != nil:
+    result = result.skipMeta
   c.callsite = nil
diff --git a/compiler/vmdef.nim b/compiler/vmdef.nim
index d4b3d891d..b4b787798 100644
--- a/compiler/vmdef.nim
+++ b/compiler/vmdef.nim
@@ -51,16 +51,16 @@ type
     opcLenSeq,
     opcLenStr,
 
-    opcIncl, opcExcl, opcCard, opcMulInt, opcDivInt, opcModInt,
+    opcIncl, opcInclRange, opcExcl, opcCard, opcMulInt, opcDivInt, opcModInt,
     opcAddFloat, opcSubFloat, opcMulFloat, opcDivFloat, opcShrInt, opcShlInt,
     opcBitandInt, opcBitorInt, opcBitxorInt, opcAddu, opcSubu, opcMulu, 
     opcDivu, opcModu, opcEqInt, opcLeInt, opcLtInt, opcEqFloat, 
-    opcLeFloat, opcLtFloat, opcLeu, opcLtu, opcEqRef, opcXor, 
+    opcLeFloat, opcLtFloat, opcLeu, opcLtu, opcEqRef, opcEqNimrodNode, opcXor, 
     opcNot, opcUnaryMinusInt, opcUnaryMinusFloat, opcBitnotInt, 
     opcEqStr, opcLeStr, opcLtStr, opcEqSet, opcLeSet, opcLtSet,
     opcMulSet, opcPlusSet, opcMinusSet, opcSymdiffSet, opcConcatStr,
     opcContainsSet, opcRepr, opcSetLenStr, opcSetLenSeq,
-    opcSwap, opcIsNil, opcOf,
+    opcSwap, opcIsNil, opcOf, opcIs,
     opcSubStr, opcConv, opcCast, opcQuit, opcReset,
     
     opcAddStrCh,
@@ -101,7 +101,6 @@ type
     opcRaise,
     opcNChild,
     opcNSetChild,
-    opcNBindSym, # opcodes for the AST manipulation following
     opcCallSite,
     opcNewStr,
   
@@ -120,8 +119,11 @@ type
     opcAsgnConst, # dest = copy(constants[Bx])
     opcLdGlobal,  # dest = globals[Bx]
     opcLdImmInt,  # dest = immediate value
+    opcNBindSym,
     opcWrGlobal,
     opcWrGlobalRef,
+    opcGlobalAlias, # load an alias to a global into a register
+    opcGlobalOnce,  # used to introduce an assignment to a global once
     opcSetType,   # dest.typ = types[Bx]
     opcTypeTrait
 
@@ -129,6 +131,21 @@ type
     label*: PSym
     fixups*: seq[TPosition]
 
+  TEvalMode* = enum           ## reason for evaluation
+    emRepl,                   ## evaluate because in REPL mode
+    emConst,                  ## evaluate for 'const' according to spec
+    emOptimize,               ## evaluate for optimization purposes (same as
+                              ## emConst?)
+    emStaticExpr,             ## evaluate for enforced compile time eval
+                              ## ('static' context)
+    emStaticStmt              ## 'static' as an expression
+
+  TSandboxFlag* = enum        ## what the evaluation engine should allow
+    allowCast,                ## allow unsafe language feature: 'cast'
+    allowFFI,                 ## allow the FFI
+    allowInfiniteLoops        ## allow endless loops
+  TSandboxFlags* = set[TSandboxFlag]
+
   TSlotKind* = enum   # We try to re-use slots in a smart way to
                       # minimize allocations; however the VM supports arbitrary
                       # temporary slot usage. This is required for the parameter
@@ -146,6 +163,8 @@ type
     blocks*: seq[TBlock]    # blocks; temp data structure
     slots*: array[TRegister, tuple[inUse: bool, kind: TSlotKind]]
     maxSlots*: int
+    globals*: array[TRegister, int] # hack: to support passing globals byref
+                                    # we map a slot persistently to a global
     
   PCtx* = ref TCtx
   TCtx* = object of passes.TPassContext # code gen context
@@ -160,17 +179,22 @@ type
     prc*: PProc
     module*: PSym
     callsite*: PNode
+    mode*: TEvalMode
+    features*: TSandboxFlags
 
   TPosition* = distinct int
 
   PEvalContext* = PCtx
-
   
 proc newCtx*(module: PSym): PCtx =
   PCtx(code: @[], debug: @[],
-    globals: newNode(nkStmtList), constants: newNode(nkStmtList), types: @[],
+    globals: newNode(nkStmtListExpr), constants: newNode(nkStmtList), types: @[],
     prc: PProc(blocks: @[]), module: module)
 
+proc refresh*(c: PCtx, module: PSym) =
+  c.module = module
+  c.prc = PProc(blocks: @[])
+
 const
   firstABxInstr* = opcTJmp
   largeInstrs* = { # instructions which use 2 int32s instead of 1:
@@ -183,3 +207,5 @@ template regA*(x: TInstr): TRegister {.immediate.} = TRegister(x.uint32 shr 8'u3
 template regB*(x: TInstr): TRegister {.immediate.} = TRegister(x.uint32 shr 16'u32 and 0xff'u32)
 template regC*(x: TInstr): TRegister {.immediate.} = TRegister(x.uint32 shr 24'u32)
 template regBx*(x: TInstr): int {.immediate.} = (x.uint32 shr 16'u32).int
+
+template jmpDiff*(x: TInstr): int {.immediate.} = regBx(x) - wordExcess
diff --git a/compiler/vmdeps.nim b/compiler/vmdeps.nim
index 2a40276d1..07100897b 100644
--- a/compiler/vmdeps.nim
+++ b/compiler/vmdeps.nim
@@ -32,62 +32,5 @@ proc opSlurp*(file: string, info: TLineInfo, module: PSym): string =
     appendToModule(module, newNode(nkIncludeStmt, info, @[
       newStrNode(nkStrLit, filename)]))
   except EIO:
-    result = ""
     LocalError(info, errCannotOpenFile, file)
-
-when false:
-  proc opExpandToAst*(c: PEvalContext, original: PNode): PNode =
-    var
-      n = original.copyTree
-      macroCall = n.sons[1]
-      expandedSym = macroCall.sons[0].sym
-
-    for i in countup(1, macroCall.sonsLen - 1):
-      macroCall.sons[i] = evalAux(c, macroCall.sons[i], {})
-
-    case expandedSym.kind
-    of skTemplate:
-      let genSymOwner = if c.tos != nil and c.tos.prc != nil:
-                          c.tos.prc 
-                        else:
-                          c.module
-      result = evalTemplate(macroCall, expandedSym, genSymOwner)
-    of skMacro:
-      # At this point macroCall.sons[0] is nkSym node.
-      # To be completely compatible with normal macro invocation,
-      # we want to replace it with nkIdent node featuring
-      # the original unmangled macro name.
-      macroCall.sons[0] = newIdentNode(expandedSym.name, expandedSym.info)
-      result = evalMacroCall(c, macroCall, original, expandedSym)
-    else:
-      InternalError(macroCall.info,
-        "ExpandToAst: expanded symbol is no macro or template")
-      result = emptyNode
-
-  proc opIs*(n: PNode): PNode =
-    InternalAssert n.sonsLen == 3 and
-      n[1].kind == nkSym and n[1].sym.kind == skType and
-      n[2].kind in {nkStrLit..nkTripleStrLit, nkType}
-    
-    let t1 = n[1].sym.typ
-
-    if n[2].kind in {nkStrLit..nkTripleStrLit}:
-      case n[2].strVal.normalize
-      of "closure":
-        let t = skipTypes(t1, abstractRange)
-        result = newIntNode(nkIntLit, ord(t.kind == tyProc and
-                                          t.callConv == ccClosure and 
-                                          tfIterator notin t.flags))
-      of "iterator":
-        let t = skipTypes(t1, abstractRange)
-        result = newIntNode(nkIntLit, ord(t.kind == tyProc and
-                                          t.callConv == ccClosure and 
-                                          tfIterator in t.flags))
-    else:
-      let t2 = n[2].typ
-      var match = if t2.kind == tyTypeClass: matchTypeClass(t2, t1)
-                  else: sameType(t1, t2)
-      result = newIntNode(nkIntLit, ord(match))
-
-    result.typ = n.typ
-
+    result = ""
diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim
index 84d82e117..ab120f008 100644
--- a/compiler/vmgen.nim
+++ b/compiler/vmgen.nim
@@ -11,19 +11,22 @@
 
 import
   unsigned, strutils, ast, astalgo, types, msgs, renderer, vmdef, 
-  trees, intsets, rodread, magicsys
+  trees, intsets, rodread, magicsys, options
 
-proc codeListing(c: PCtx, result: var string) =
+when hasFFI:
+  import evalffi
+
+proc codeListing(c: PCtx, result: var string, start=0) =
   # first iteration: compute all necessary labels:
   var jumpTargets = initIntSet()
   
-  for i in 0.. < c.code.len:
+  for i in start.. < c.code.len:
     let x = c.code[i]
     if x.opcode in relativeJumps:
       jumpTargets.incl(i+x.regBx-wordExcess)
 
   # for debugging purposes
-  var i = 0
+  var i = start
   while i < c.code.len:
     if i in jumpTargets: result.addf("L$1:\n", i)
     let x = c.code[i]
@@ -45,9 +48,9 @@ proc codeListing(c: PCtx, result: var string) =
     result.add("\n")
     inc i
 
-proc echoCode*(c: PCtx) =
+proc echoCode*(c: PCtx, start=0) {.deprecated.} =
   var buf = ""
-  codeListing(c, buf)
+  codeListing(c, buf, start)
   echo buf
 
 proc gABC(ctx: PCtx; n: PNode; opc: TOpcode; a, b, c: TRegister = 0) =
@@ -112,8 +115,10 @@ const
 
 proc getTemp(c: PCtx; typ: PType): TRegister =
   let c = c.prc
-  # we prefer the same slot kind here for efficiency:
-  let k = typ.getSlotKind
+  # we prefer the same slot kind here for efficiency. Unfortunately for
+  # discardable return types we may not know the desired type. This can happen
+  # for e.g. mNAdd[Multiple]:
+  let k = if typ.isNil: slotTempComplex else: typ.getSlotKind
   for i in 0 .. c.maxSlots-1:
     if c.slots[i].kind == k and not c.slots[i].inUse:
       c.slots[i].inUse = true
@@ -129,6 +134,21 @@ proc getTemp(c: PCtx; typ: PType): TRegister =
   c.slots[c.maxSlots] = (inUse: true, kind: k)
   inc c.maxSlots
 
+proc getGlobalSlot(c: PCtx; n: PNode; s: PSym): TRegister =
+  let p = c.prc
+  for i in 0 .. p.maxSlots-1:
+    if p.globals[i] == s.id: return TRegister(i)
+
+  result = TRegister(p.maxSlots)
+  p.slots[p.maxSlots] = (inUse: true, kind: slotFixedVar)
+  p.globals[p.maxSlots] = s.id
+  inc p.maxSlots
+  # XXX this is still not correct! We need to load the global in a proc init
+  # section, otherwise control flow could lead to a usage before it's been
+  # loaded.
+  c.gABx(n, opcGlobalAlias, result, s.position)
+  # XXX add some internal asserts here
+
 proc freeTemp(c: PCtx; r: TRegister) =
   let c = c.prc
   if c.slots[r].kind >= slotSomeTemp: c.slots[r].inUse = false
@@ -179,13 +199,16 @@ proc gen(c: PCtx; n: PNode; dest: TRegister) =
 proc gen(c: PCtx; n: PNode) =
   var tmp: TDest = -1
   gen(c, n, tmp)
-  InternalAssert tmp < 0
+  #if n.typ.isEmptyType: InternalAssert tmp < 0
 
 proc genx(c: PCtx; n: PNode): TRegister =
   var tmp: TDest = -1
   gen(c, n, tmp)
   result = TRegister(tmp)
 
+proc clearDest(n: PNode; dest: var TDest) {.inline.} =
+  if isEmptyType(n.typ): dest = -1
+
 proc isNotOpr(n: PNode): bool =
   n.kind in nkCallKinds and n.sons[0].kind == nkSym and
     n.sons[0].sym.magic == mNot
@@ -224,6 +247,7 @@ proc genWhile(c: PCtx; n: PNode) =
 proc genBlock(c: PCtx; n: PNode; dest: var TDest) =
   withBlock(n.sons[0].sym):
     c.gen(n.sons[1], dest)
+  clearDest(n, dest)
 
 proc genBreak(c: PCtx; n: PNode) =
   let L1 = c.xjmp(n, opcJmp)
@@ -268,6 +292,7 @@ proc genIf(c: PCtx, n: PNode; dest: var TDest) =
     else:
       c.gen(it.sons[0], dest)
   for endPos in endings: c.patch(endPos)
+  clearDest(n, dest)
 
 proc genAndOr(c: PCtx; n: PNode; opc: TOpcode; dest: var TDest) =
   #   asgn dest, a
@@ -275,9 +300,9 @@ proc genAndOr(c: PCtx; n: PNode; opc: TOpcode; dest: var TDest) =
   #   asgn dest, b
   # L1:
   if dest < 0: dest = getTemp(c, n.typ)
-  c.gen(n.sons[0], dest)
-  let L1 = c.xjmp(n, opc)
   c.gen(n.sons[1], dest)
+  let L1 = c.xjmp(n, opc, dest)
+  c.gen(n.sons[2], dest)
   c.patch(L1)
 
 proc rawGenLiteral(c: PCtx; n: PNode): int =
@@ -296,7 +321,8 @@ proc sameConstant*(a, b: PNode): bool =
     of nkCharLit..nkInt64Lit: result = a.intVal == b.intVal
     of nkFloatLit..nkFloat64Lit: result = a.floatVal == b.floatVal
     of nkStrLit..nkTripleStrLit: result = a.strVal == b.strVal
-    of nkEmpty, nkNilLit, nkType: result = true
+    of nkType: result = a.typ == b.typ
+    of nkEmpty, nkNilLit: result = true
     else: 
       if sonsLen(a) == sonsLen(b): 
         for i in countup(0, sonsLen(a) - 1): 
@@ -309,6 +335,11 @@ proc genLiteral(c: PCtx; n: PNode): int =
     if sameConstant(c.constants[i], n): return i
   result = rawGenLiteral(c, n)
 
+proc unused(n: PNode; x: TDest) {.inline.} =
+  if x >= 0: 
+    #debug(n)
+    InternalError(n.info, "not unused")
+
 proc genCase(c: PCtx; n: PNode; dest: var TDest) =
   #  if (!expr1) goto L1;
   #    thenPart
@@ -320,7 +351,10 @@ proc genCase(c: PCtx; n: PNode; dest: var TDest) =
   #  L2:
   #    elsePart
   #  Lend:
-  if dest < 0 and not isEmptyType(n.typ): dest = getTemp(c, n.typ)
+  if not isEmptyType(n.typ):
+    if dest < 0: dest = getTemp(c, n.typ)
+  else:
+    unused(n, dest)
   var endings: seq[TPosition] = @[]
   withTemp(tmp, n.sons[0].typ):
     c.gen(n.sons[0], tmp)
@@ -340,6 +374,7 @@ proc genCase(c: PCtx; n: PNode; dest: var TDest) =
           endings.add(c.xjmp(it.lastSon, opcJmp, 0))
         c.patch(elsePos)
   for endPos in endings: c.patch(endPos)
+  clearDest(n, dest)
 
 proc genType(c: PCtx; typ: PType): int =
   for i, t in c.types:
@@ -379,6 +414,7 @@ proc genTry(c: PCtx; n: PNode; dest: var TDest) =
   if fin.kind == nkFinally:
     c.gen(fin.sons[0], dest)
   c.gABx(fin, opcFinallyEnd, 0, 0)
+  clearDest(n, dest)
 
 proc genRaise(c: PCtx; n: PNode) =
   let dest = genx(c, n.sons[0])
@@ -393,14 +429,20 @@ proc genReturn(c: PCtx; n: PNode) =
 proc genCall(c: PCtx; n: PNode; dest: var TDest) =
   if dest < 0 and not isEmptyType(n.typ): dest = getTemp(c, n.typ)
   let x = c.getTempRange(n.len, slotTempUnknown)
-  for i in 0.. <n.len: 
+  # varargs need 'opcSetType' for the FFI support:
+  let fntyp = n.sons[0].typ
+  for i in 0.. <n.len:
     var r: TRegister = x+i
     c.gen(n.sons[i], r)
+    if i >= fntyp.len:
+      internalAssert tfVarargs in fntyp.flags
+      c.gABx(n, opcSetType, r, c.genType(n.sons[i].typ))
   if dest < 0:
     c.gABC(n, opcIndCall, 0, x, n.len)
   else:
     c.gABC(n, opcIndCallAsgn, dest, x, n.len)
   c.freeTempRange(x, n.len)
+  clearDest(n, dest)
 
 proc genNew(c: PCtx; n: PNode) =
   let dest = c.genx(n.sons[1])
@@ -463,7 +505,7 @@ proc genBinaryStmt(c: PCtx; n: PNode; opc: TOpcode) =
   c.freeTemp(tmp)
 
 proc genUnaryStmt(c: PCtx; n: PNode; opc: TOpcode) =
-  let tmp = c.genx(n.sons[2])
+  let tmp = c.genx(n.sons[1])
   c.gABC(n, opc, tmp, 0, 0)
   c.freeTemp(tmp)
 
@@ -493,9 +535,6 @@ proc genAddSubInt(c: PCtx; n: PNode; dest: var TDest; opc: TOpcode) =
   else:
     genBinaryABC(c, n, dest, opc)
 
-proc unused(n: PNode; x: TDest) {.inline.} =
-  if x >= 0: InternalError(n.info, "not unused")
-
 proc genConv(c: PCtx; n, arg: PNode; dest: var TDest; opc=opcConv) =  
   let tmp = c.genx(arg)
   c.gABx(n, opcSetType, tmp, genType(c, arg.typ))
@@ -508,7 +547,7 @@ proc genCard(c: PCtx; n: PNode; dest: var TDest) =
   let tmp = c.genx(n.sons[1])
   if dest < 0: dest = c.getTemp(n.typ)
   c.genSetType(n.sons[1], tmp)
-  c.gABC(n, opc, dest, tmp)
+  c.gABC(n, opcCard, dest, tmp)
   c.freeTemp(tmp)
 
 proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
@@ -659,12 +698,16 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
     unused(n, dest)
     var d = c.genx(n.sons[1])
     c.gABC(n, opcReset, d)
-  of mOf: 
+  of mOf, mIs:
     if dest < 0: dest = c.getTemp(n.typ)
     var tmp = c.genx(n.sons[1])
-    c.gABC(n, opcOf, dest, tmp)
-    c.gABx(n, opcOf, 0, c.genType(n.sons[2].typ.skipTypes(abstractPtrs)))
+    var idx = c.getTemp(getSysType(tyInt))
+    var typ = n.sons[2].typ
+    if m == mOf: typ = typ.skipTypes(abstractPtrs)
+    c.gABx(n, opcLdImmInt, idx, c.genType(typ))
+    c.gABC(n, if m == mOf: opcOf else: opcIs, dest, tmp, idx)
     c.freeTemp(tmp)
+    c.freeTemp(idx)
   of mSizeOf:
     GlobalError(n.info, errCannotInterpretNodeX, renderTree(n))
   of mHigh:
@@ -696,16 +739,12 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
     genUnaryABC(c, n, dest, opcParseExprToAst)
   of mParseStmtToAst:
     genUnaryABC(c, n, dest, opcParseStmtToAst)
-  of mExpandToAst:
-    InternalError(n.info, "cannot generate code for: " & $m)
   of mTypeTrait: 
     let tmp = c.genx(n.sons[1])
     if dest < 0: dest = c.getTemp(n.typ)
-    c.gABx(n, opcSetType, tmp, c.genType(n.sons[1]))
+    c.gABx(n, opcSetType, tmp, c.genType(n.sons[1].typ))
     c.gABC(n, opcTypeTrait, dest, tmp)
     c.freeTemp(tmp)
-  of mIs:
-    InternalError(n.info, "cannot generate code for: " & $m)
   of mSlurp: genUnaryABC(c, n, dest, opcSlurp)
   of mStaticExec: genBinaryABC(c, n, dest, opcGorge)
   of mNLen: genUnaryABI(c, n, dest, opcLenSeq)
@@ -750,11 +789,17 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
   of mNNewNimNode: genBinaryABC(c, n, dest, opcNNewNimNode)
   of mNCopyNimNode: genUnaryABC(c, n, dest, opcNCopyNimNode)
   of mNCopyNimTree: genUnaryABC(c, n, dest, opcNCopyNimTree)
-  of mNBindSym: genUnaryABC(c, n, dest, opcNBindSym)
+  of mNBindSym:
+    if n[1].kind in {nkClosedSymChoice, nkOpenSymChoice, nkSym}:
+      let idx = c.genLiteral(n[1])
+      if dest < 0: dest = c.getTemp(n.typ)
+      c.gABx(n, opcNBindSym, dest, idx)
+    else:
+      internalError(n.info, "invalid bindSym usage")
   of mStrToIdent: genUnaryABC(c, n, dest, opcStrToIdent)
   of mIdentToStr: genUnaryABC(c, n, dest, opcIdentToStr)
   of mEqIdent: genBinaryABC(c, n, dest, opcEqIdent)
-  of mEqNimrodNode: genBinaryABC(c, n, dest, opcEqRef)
+  of mEqNimrodNode: genBinaryABC(c, n, dest, opcEqNimrodNode)
   of mNLineInfo: genUnaryABC(c, n, dest, opcNLineInfo)
   of mNHint: 
     unused(n, dest)
@@ -771,6 +816,16 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest) =
   of mNGenSym: genBinaryABC(c, n, dest, opcGenSym)
   of mMinI, mMaxI, mMinI64, mMaxI64, mAbsF64, mMinF64, mMaxF64, mAbsI, mAbsI64:
     c.genCall(n, dest)
+  of mExpandToAst:
+    if n.len != 2:
+      globalError(n.info, errGenerated, "expandToAst requires 1 argument")
+    let arg = n.sons[1]
+    if arg.kind in nkCallKinds:
+      #if arg[0].kind != nkSym or arg[0].sym.kind notin {skTemplate, skMacro}:
+      #      "ExpandToAst: expanded symbol is no macro or template"
+      c.genCall(arg, dest)
+    else:
+      globalError(n.info, "expandToAst requires a call expression")
   else:
     # mGCref, mGCunref, 
     InternalError(n.info, "cannot generate code for: " & $m)
@@ -823,7 +878,7 @@ proc whichAsgnOpc(n: PNode): TOpcode =
     opcAsgnStr
   of tyFloat..tyFloat128:
     opcAsgnFloat
-  of tyRef, tyNil:
+  of tyRef, tyNil, tyVar:
     opcAsgnRef
   else:
     opcAsgnComplex
@@ -839,6 +894,8 @@ proc genAsgn(c: PCtx; dest: TDest; ri: PNode; requiresCopy: bool) =
   gABC(c, ri, whichAsgnOpc(ri), dest, tmp)
   c.freeTemp(tmp)
 
+template isGlobal(s: PSym): bool = sfGlobal in s.flags and s.kind != skForVar
+
 proc genAsgn(c: PCtx; le, ri: PNode; requiresCopy: bool) =
   case le.kind
   of nkBracketExpr:
@@ -850,15 +907,17 @@ proc genAsgn(c: PCtx; le, ri: PNode; requiresCopy: bool) =
     else:
       c.gABC(le, whichAsgnOpc(le, opcWrArr), dest, idx, tmp)
     c.freeTemp(tmp)
-  of nkDotExpr:
-    let dest = c.genx(le.sons[0])
-    let idx = c.genx(le.sons[1])
+  of nkDotExpr, nkCheckedFieldExpr:
+    # XXX field checks here
+    let left = if le.kind == nkDotExpr: le else: le.sons[0]
+    let dest = c.genx(left.sons[0])
+    let idx = c.genx(left.sons[1])
     let tmp = c.genx(ri)
-    c.gABC(le, whichAsgnOpc(le, opcWrObj), dest, idx, tmp)
+    c.gABC(left, whichAsgnOpc(left, opcWrObj), dest, idx, tmp)
     c.freeTemp(tmp)
   of nkSym:
     let s = le.sym
-    if sfGlobal in s.flags:
+    if s.isGlobal:
       withTemp(tmp, le.typ):
         gen(c, ri, tmp)
         c.gABx(le, whichAsgnOpc(le, opcWrGlobal), tmp, s.position)
@@ -878,15 +937,52 @@ proc genLit(c: PCtx; n: PNode; dest: var TDest) =
   let lit = genLiteral(c, n)
   c.gABx(n, opc, dest, lit)
 
+proc genTypeLit(c: PCtx; t: PType; dest: var TDest) =
+  var n = newNode(nkType)
+  n.typ = t
+  genLit(c, n, dest)
+
+proc importcSym(c: PCtx; info: TLineInfo; s: PSym) =
+  when hasFFI:
+    if allowFFI in c.features:
+      c.globals.add(importcSymbol(s))
+      s.position = c.globals.len
+    else:
+      localError(info, errGenerated, "VM is not allowed to 'importc'")
+  else:
+    localError(info, errGenerated,
+               "cannot 'importc' variable at compile time")
+
+proc cannotEval(n: PNode) {.noinline.} =
+  globalError(n.info, errGenerated, "cannot evaluate at compile time: " &
+    n.renderTree)
+
+proc genGlobalInit(c: PCtx; n: PNode; s: PSym) =
+  c.globals.add(emptyNode.copyNode)
+  s.position = c.globals.len
+  # This is rather hard to support, due to the laziness of the VM code
+  # generator. See tests/compile/tmacro2 for why this is necesary:
+  #   var decls{.compileTime.}: seq[PNimrodNode] = @[]
+  c.gABx(n, opcGlobalOnce, 0, s.position)
+  let tmp = c.genx(s.ast)
+  c.gABx(n, whichAsgnOpc(n, opcWrGlobal), tmp, s.position)
+  c.freeTemp(tmp)
+
 proc genRdVar(c: PCtx; n: PNode; dest: var TDest) =
   let s = n.sym
-  if sfGlobal in s.flags:
-    if dest < 0: dest = c.getTemp(s.typ)
+  if s.isGlobal:
+    if sfCompileTime in s.flags or c.mode == emRepl:
+      discard
+    else:
+      cannotEval(n)
     if s.position == 0:
-      c.globals.add(s.ast)
-      s.position = c.globals.len
-      # XXX var g = codeHere() ?
-    c.gABx(n, opcLdGlobal, dest, s.position)
+      if sfImportc in s.flags: c.importcSym(n.info, s)
+      else: genGlobalInit(c, n, s)
+    if dest < 0:
+      dest = c.getGlobalSlot(n, s)
+      #c.gABx(n, opcAliasGlobal, dest, s.position)
+    else:
+      c.gABx(n, opcLdGlobal, dest, s.position)
   else:
     if s.position > 0 or (s.position == 0 and s.kind in {skParam, skResult}):
       if dest < 0:
@@ -895,7 +991,9 @@ proc genRdVar(c: PCtx; n: PNode; dest: var TDest) =
         # we need to generate an assignment:
         genAsgn(c, dest, n, c.prc.slots[dest].kind >= slotSomeTemp)
     else:
-      InternalError(n.info, s.name.s & " " & $s.position)
+      # see tests/t99bott for an example that triggers it:
+      cannotEval(n)
+      #InternalError(n.info, s.name.s & " " & $s.position)
 
 proc genAccess(c: PCtx; n: PNode; dest: var TDest; opc: TOpcode) =
   let a = c.genx(n.sons[0])
@@ -908,6 +1006,10 @@ proc genAccess(c: PCtx; n: PNode; dest: var TDest; opc: TOpcode) =
 proc genObjAccess(c: PCtx; n: PNode; dest: var TDest) =
   genAccess(c, n, dest, opcLdObj)
 
+proc genCheckedObjAccess(c: PCtx; n: PNode; dest: var TDest) =
+  # XXX implement field checks!
+  genAccess(c, n.sons[0], dest, opcLdObj)
+
 proc genArrAccess(c: PCtx; n: PNode; dest: var TDest) =
   if n.sons[0].typ.skipTypes(abstractVarRange).kind in {tyString, tyCString}:
     genAccess(c, n, dest, opcLdStrIdx)
@@ -938,8 +1040,15 @@ proc getNullValue(typ: PType, info: TLineInfo): PNode =
   of tyFloat..tyFloat128: 
     result = newNodeIt(nkFloatLit, info, t)
   of tyVar, tyPointer, tyPtr, tyCString, tySequence, tyString, tyExpr, 
-     tyStmt, tyTypeDesc, tyProc, tyRef:
+     tyStmt, tyTypeDesc, tyRef:
     result = newNodeIT(nkNilLit, info, t)
+  of tyProc:
+    if t.callConv != ccClosure:
+      result = newNodeIT(nkNilLit, info, t)
+    else:
+      result = newNodeIT(nkPar, info, t)
+      result.add(newNodeIT(nkNilLit, info, t))
+      result.add(newNodeIT(nkNilLit, info, t))
   of tyObject: 
     result = newNodeIT(nkPar, info, t)
     getNullValueAux(t.n, result)
@@ -984,11 +1093,14 @@ proc genVarSection(c: PCtx; n: PNode) =
       c.freeTemp(tmp)
     elif a.sons[0].kind == nkSym:
       let s = a.sons[0].sym
-      if sfGlobal in s.flags:
+      if s.isGlobal:
         if s.position == 0:
-          let sa = if s.ast.isNil: getNullValue(s.typ, a.info) else: s.ast
-          c.globals.add(sa)
-          s.position = c.globals.len
+          if sfImportc in s.flags: c.importcSym(a.info, s)
+          else:
+            let sa = if s.ast.isNil: getNullValue(s.typ, a.info) else: s.ast
+            c.globals.add(sa)
+            s.position = c.globals.len
+            # "Once support" is unnecessary here
         if a.sons[2].kind == nkEmpty:
           when false:
             withTemp(tmp, s.typ):
@@ -1016,23 +1128,31 @@ proc genVarSection(c: PCtx; n: PNode) =
 proc genArrayConstr(c: PCtx, n: PNode, dest: var TDest) =
   if dest < 0: dest = c.getTemp(n.typ)
   c.gABx(n, opcLdNull, dest, c.genType(n.typ))
-  let intType = getSysType(tyInt)
-  var tmp = getTemp(c, intType)
-  c.gABx(n, opcLdNull, tmp, c.genType(intType))
-  for x in n:
-    let a = c.genx(x)
-    c.gABC(n, opcWrArr, dest, a, tmp)
-    c.gABI(n, opcAddImmInt, tmp, tmp, 1)
-    c.freeTemp(a)
-  c.freeTemp(tmp)
+  if n.len > 0:
+    let intType = getSysType(tyInt)
+    var tmp = getTemp(c, intType)
+    c.gABx(n, opcLdNull, tmp, c.genType(intType))
+    for x in n:
+      let a = c.genx(x)
+      c.gABC(n, whichAsgnOpc(x, opcWrArr), dest, tmp, a)
+      c.gABI(n, opcAddImmInt, tmp, tmp, 1)
+      c.freeTemp(a)
+    c.freeTemp(tmp)
 
 proc genSetConstr(c: PCtx, n: PNode, dest: var TDest) =
   if dest < 0: dest = c.getTemp(n.typ)
   c.gABx(n, opcLdNull, dest, c.genType(n.typ))
   for x in n:
-    let a = c.genx(x)
-    c.gABC(n, opcIncl, dest, a)
-    c.freeTemp(a)
+    if x.kind == nkRange:
+      let a = c.genx(x.sons[0])
+      let b = c.genx(x.sons[1])
+      c.gABC(n, opcInclRange, dest, a, b)
+      c.freeTemp(b)
+      c.freeTemp(a)
+    else:
+      let a = c.genx(x)
+      c.gABC(n, opcIncl, dest, a)
+      c.freeTemp(a)
 
 proc genObjConstr(c: PCtx, n: PNode, dest: var TDest) =
   if dest < 0: dest = c.getTemp(n.typ)
@@ -1054,7 +1174,8 @@ proc genObjConstr(c: PCtx, n: PNode, dest: var TDest) =
 
 proc genTupleConstr(c: PCtx, n: PNode, dest: var TDest) =
   if dest < 0: dest = c.getTemp(n.typ)
-  var idx = getTemp(c, getSysType(tyInt))
+  c.gABx(n, opcLdNull, dest, c.genType(n.typ))
+  # XXX x = (x.old, 22)  produces wrong code ... stupid self assignments
   for i in 0.. <n.len:
     let it = n.sons[i]
     if it.kind == nkExprColonExpr:
@@ -1065,10 +1186,8 @@ proc genTupleConstr(c: PCtx, n: PNode, dest: var TDest) =
       c.freeTemp(idx)
     else:
       let tmp = c.genx(it)
-      c.gABx(it, opcLdImmInt, idx, i)
-      c.gABC(it, whichAsgnOpc(it, opcWrObj), dest, idx, tmp)
+      c.gABC(it, whichAsgnOpc(it, opcWrObj), dest, i.TRegister, tmp)
       c.freeTemp(tmp)
-  c.freeTemp(idx)
 
 proc genProc*(c: PCtx; s: PSym): int
 
@@ -1079,7 +1198,9 @@ proc gen(c: PCtx; n: PNode; dest: var TDest) =
     case s.kind
     of skVar, skForVar, skTemp, skLet, skParam, skResult:
       genRdVar(c, n, dest)
-    of skProc, skConverter, skMacro, skMethod, skIterator:
+    of skProc, skConverter, skMacro, skTemplate, skMethod, skIterator:
+      # 'skTemplate' is only allowed for 'getAst' support:
+      if sfImportc in s.flags: c.importcSym(n.info, s)
       genLit(c, n, dest)
     of skConst:
       gen(c, s.ast, dest)
@@ -1096,6 +1217,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest) =
         InternalError(n.info, 
           "too large offset! cannot generate code for: " & s.name.s)
       dest = s.position
+    of skType:
+      genTypeLit(c, s.typ, dest)
     else:
       InternalError(n.info, "cannot generate code for: " & s.name.s)
   of nkCallKinds:
@@ -1109,11 +1232,15 @@ proc gen(c: PCtx; n: PNode; dest: var TDest) =
       c.gABx(n, opcLdImmInt, dest, n.intVal.int)
     else:
       genLit(c, n, dest)
-  of nkUIntLit..nkNilLit: genLit(c, n, dest)
+  of nkUIntLit..pred(nkNilLit): genLit(c, n, dest)
+  of nkNilLit:
+    if not n.typ.isEmptyType: genLit(c, n, dest)
+    else: unused(n, dest)
   of nkAsgn, nkFastAsgn: 
     unused(n, dest)
     genAsgn(c, n.sons[0], n.sons[1], n.kind == nkAsgn)
   of nkDotExpr: genObjAccess(c, n, dest)
+  of nkCheckedFieldExpr: genCheckedObjAccess(c, n, dest)
   of nkBracketExpr: genArrAccess(c, n, dest)
   of nkDerefExpr, nkHiddenDeref: genAddrDeref(c, n, dest, opcDeref)
   of nkAddr, nkHiddenAddr: genAddrDeref(c, n, dest, opcAddr)
@@ -1176,6 +1303,11 @@ proc gen(c: PCtx; n: PNode; dest: var TDest) =
   of nkCurly: genSetConstr(c, n, dest)
   of nkObjConstr: genObjConstr(c, n, dest)
   of nkPar, nkClosure: genTupleConstr(c, n, dest)
+  of nkCast:
+    if allowCast in c.features:
+      genConv(c, n, n.sons[1], dest, opcCast)
+    else:
+      localError(n.info, errGenerated, "VM is not allowed to 'cast'")
   else:
     InternalError n.info, "too implement " & $n.kind
 
@@ -1193,14 +1325,16 @@ proc genStmt*(c: PCtx; n: PNode): int =
   var d: TDest = -1
   c.gen(n, d)
   c.gABC(n, opcEof)
-  InternalAssert d < 0
+  if d >= 0: internalError(n.info, "some destination set")
 
-proc genExpr*(c: PCtx; n: PNode): int =
+proc genExpr*(c: PCtx; n: PNode, requiresValue = true): int =
   c.removeLastEof
   result = c.code.len
   var d: TDest = -1
   c.gen(n, d)
-  InternalAssert d >= 0
+  if d < 0:
+    if requiresValue: internalError(n.info, "no destination set")
+    d = 0
   c.gABC(n, opcEof, d)
 
 proc genParams(c: PCtx; params: PNode) =
@@ -1225,12 +1359,11 @@ proc optimizeJumps(c: PCtx; start: int) =
     case opc
     of opcTJmp, opcFJmp:
       var reg = c.code[i].regA
-      var d = i + c.code[i].regBx
-      var iters = maxIterations
-      while iters > 0:
+      var d = i + c.code[i].jmpDiff
+      for iters in countdown(maxIterations, 0):
         case c.code[d].opcode
         of opcJmp:
-          d = d + c.code[d].regBx
+          d = d + c.code[d].jmpDiff
         of opcTJmp, opcFJmp:
           if c.code[d].regA != reg: break
           # tjmp x, 23
@@ -1238,28 +1371,40 @@ proc optimizeJumps(c: PCtx; start: int) =
           # tjmp x, 12
           # -- we know 'x' is true, and so can jump to 12+13:
           if c.code[d].opcode == opc:
-            d = d + c.code[d].regBx
+            d = d + c.code[d].jmpDiff
           else:
             # tjmp x, 23
             # fjmp x, 22
             # We know 'x' is true so skip to the next instruction:
             d = d + 1
         else: break
-        dec iters
-      c.finalJumpTarget(i, d - i)
+      if d != i + c.code[i].jmpDiff:
+        c.finalJumpTarget(i, d - i)
     of opcJmp:
-      var d = i + c.code[i].regBx
+      var d = i + c.code[i].jmpDiff
       var iters = maxIterations
       while c.code[d].opcode == opcJmp and iters > 0:
-        d = d + c.code[d].regBx
+        d = d + c.code[d].jmpDiff
         dec iters
-      c.finalJumpTarget(i, d - i)
+      if c.code[d].opcode == opcRet:
+        # optimize 'jmp to ret' to 'ret' here
+        c.code[i] = c.code[d]
+      elif d != i + c.code[i].jmpDiff:
+        c.finalJumpTarget(i, d - i)
     else: discard
 
 proc genProc(c: PCtx; s: PSym): int =
   let x = s.ast.sons[optimizedCodePos]
   if x.kind == nkEmpty:
-    c.removeLastEof
+    #if s.name.s == "outterMacro" or s.name.s == "innerProc":
+    #  echo "GENERATING CODE FOR ", s.name.s
+    let last = c.code.len-1
+    var eofInstr: TInstr
+    if last >= 0 and c.code[last].opcode == opcEof:
+      eofInstr = c.code[last]
+      c.code.setLen(last)
+      c.debug.setLen(last)
+    #c.removeLastEof
     result = c.code.len+1 # skip the jump instruction
     s.ast.sons[optimizedCodePos] = newIntNode(nkIntLit, result)
     # thanks to the jmp we can add top level statements easily and also nest
@@ -1271,13 +1416,22 @@ proc genProc(c: PCtx; s: PSym): int =
     c.prc = p
     # iterate over the parameters and allocate space for them:
     genParams(c, s.typ.n)
+    if tfCapturesEnv in s.typ.flags:
+      #let env = s.ast.sons[paramsPos].lastSon.sym
+      #assert env.position == 2
+      c.prc.slots[c.prc.maxSlots] = (inUse: true, kind: slotFixedLet)
+      inc c.prc.maxSlots
     gen(c, body)
     # generate final 'return' statement:
     c.gABC(body, opcRet)
     c.patch(procStart)
-    c.gABC(body, opcEof)
-    s.position = c.prc.maxSlots
+    c.gABC(body, opcEof, eofInstr.regA)
+    c.optimizeJumps(result)
+    s.offset = c.prc.maxSlots
+    #if s.name.s == "importImpl_forward" or s.name.s == "importImpl":
+    #  c.echoCode(result)
+    #  echo renderTree(body)
     c.prc = oldPrc
-    #c.echoCode
   else:
+    c.prc.maxSlots = s.offset
     result = x.intVal.int
diff --git a/compiler/wordrecg.nim b/compiler/wordrecg.nim
index 39b19646e..5f0e5be94 100644
--- a/compiler/wordrecg.nim
+++ b/compiler/wordrecg.nim
@@ -28,9 +28,9 @@ type
     wElif, wElse, wEnd, wEnum, wExcept, wExport,
     wFinally, wFor, wFrom, wGeneric, wIf, wImport, wIn, 
     wInclude, wInterface, wIs, wIsnot, wIterator, wLambda, wLet,
-    wMacro, wMethod, wMixin, wUsing, wMod, wNil, 
+    wMacro, wMethod, wMixin, wMod, wNil, 
     wNot, wNotin, wObject, wOf, wOr, wOut, wProc, wPtr, wRaise, wRef, wReturn, 
-    wShared, wShl, wShr, wStatic, wTemplate, wTry, wTuple, wType, wVar, 
+    wShared, wShl, wShr, wStatic, wTemplate, wTry, wTuple, wType, wUsing, wVar, 
     wWhen, wWhile, wWith, wWithout, wXor, wYield,
     
     wColon, wColonColon, wEquals, wDot, wDotDot,
@@ -95,7 +95,7 @@ const
   
   cppNimSharedKeywords* = {
     wAsm, wBreak, wCase, wConst, wContinue, wDo, wElse, wEnum, wExport,
-    wFor, wIf, wReturn, wStatic, wTemplate, wTry, wWhile, wUsing }
+    wFor, wIf, wReturn, wStatic, wTemplate, wTry, wWhile, wUsing}
 
   specialWords*: array[low(TSpecialWord)..high(TSpecialWord), string] = ["", 
     
@@ -107,11 +107,11 @@ const
     "finally", "for", "from", "generic", "if", 
     "import", "in", "include", "interface", "is", "isnot", "iterator",
     "lambda", "let",
-    "macro", "method", "mixin", "using", "mod", "nil", "not", "notin",
+    "macro", "method", "mixin", "mod", "nil", "not", "notin",
     "object", "of", "or", 
     "out", "proc", "ptr", "raise", "ref", "return",
     "shared", "shl", "shr", "static",
-    "template", "try", "tuple", "type", "var", 
+    "template", "try", "tuple", "type", "using", "var", 
     "when", "while", "with", "without", "xor",
     "yield",
 
diff --git a/config/nimrod.cfg b/config/nimrod.cfg
index 37024f3de..2817eac55 100644
--- a/config/nimrod.cfg
+++ b/config/nimrod.cfg
@@ -27,6 +27,7 @@ path="$lib/wrappers/readline"
 path="$lib/wrappers/sdl"
 # path="$lib/wrappers/x11"
 path="$lib/wrappers/zip"
+path="$lib/wrappers/libffi"
 path="$lib/windows"
 path="$lib/posix"
 path="$lib/js"
diff --git a/doc/keywords.txt b/doc/keywords.txt
index fa3ce4786..2d18d7969 100644
--- a/doc/keywords.txt
+++ b/doc/keywords.txt
@@ -7,13 +7,14 @@ finally for from
 generic
 if import in include interface is isnot iterator
 lambda let
-macro method mixin using mod
+macro method mixin mod
 nil not notin
 object of or out
 proc ptr
 raise ref return
 shared shl shr static
 template try tuple type
+using
 var
 when while with without
 xor
diff --git a/koch.nim b/koch.nim
index 97fcf5b2c..6e0d5bdf2 100644
--- a/koch.nim
+++ b/koch.nim
@@ -45,6 +45,7 @@ Possible Commands:
   tests                    run the testsuite
   update                   updates nimrod to the latest version from github
                            (compile koch with -d:withUpdate to enable)
+  temp options             creates a temporary compiler for testing
 Boot options:
   -d:release               produce a release version of the compiler
   -d:tinyc                 include the Tiny C backend (not supported on Windows)
@@ -268,6 +269,13 @@ proc tests(args: string) =
   exec(getCurrentDir() / "tests/tester".exe & " run")
   exec(getCurrentDir() / "tests/tester".exe & " merge")
 
+proc temp(args: string) =
+  var output = "compiler" / "nimrod".exe
+  var finalDest = "bin" / "nimrod_temp".exe
+  exec("nimrod c compiler" / "nimrod")
+  copyExe(output, finalDest)
+  if args.len > 0: exec(finalDest & " " & args)
+
 proc showHelp() = 
   quit(HelpText % [NimrodVersion & repeatChar(44-len(NimrodVersion)), 
                    CompileDate, CompileTime])
@@ -291,6 +299,7 @@ of cmdArgument:
       update(op.cmdLineRest)
     else:
       quit "this Koch has not been compiled with -d:withUpdate"
+  of "temp": temp(op.cmdLineRest)
   else: showHelp()
 of cmdEnd: showHelp()
 
diff --git a/lib/core/macros.nim b/lib/core/macros.nim
index 52ee9326f..8ffd268ff 100644
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -158,7 +158,13 @@ proc `intVal=`*(n: PNimrodNode, val: biggestInt) {.magic: "NSetIntVal".}
 proc `floatVal=`*(n: PNimrodNode, val: biggestFloat) {.magic: "NSetFloatVal".}
 proc `symbol=`*(n: PNimrodNode, val: PNimrodSymbol) {.magic: "NSetSymbol".}
 proc `ident=`*(n: PNimrodNode, val: TNimrodIdent) {.magic: "NSetIdent".}
-proc `typ=`*(n: PNimrodNode, typ: typedesc) {.magic: "NSetType".}
+#proc `typ=`*(n: PNimrodNode, typ: typedesc) {.magic: "NSetType".}
+# this is not sound! Unfortunately forbidding 'typ=' is not enough, as you
+# can easily do:
+#   let bracket = semCheck([1, 2])
+#   let fake = semCheck(2.0)
+#   bracket[0] = fake  # constructs a mixed array with ints and floats!
+
 proc `strVal=`*(n: PNimrodNode, val: string) {.magic: "NSetStrVal".}
 
 proc newNimNode*(kind: TNimrodNodeKind,
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim
index 69da2bba8..4bfdf5e58 100644
--- a/lib/packages/docutils/highlite.nim
+++ b/lib/packages/docutils/highlite.nim
@@ -53,7 +53,7 @@ const
     "interface", "is", "isnot", "iterator", "lambda", "let", "macro", "method",
     "mixin", "mod", "nil", "not", "notin", "object", "of", "or", "out", "proc",
     "ptr", "raise", "ref", "return", "shared", "shl", "shr", "static",
-    "template", "try", "tuple", "type", "var", "when", "while", "with",
+    "template", "try", "tuple", "type", "using", "var", "when", "while", "with",
     "without", "xor", "yield"]
 
 proc getSourceLanguage*(name: string): TSourceLanguage = 
diff --git a/lib/pure/encodings.nim b/lib/pure/encodings.nim
index ce4238409..bc849bfe8 100644
--- a/lib/pure/encodings.nim
+++ b/lib/pure/encodings.nim
@@ -14,15 +14,15 @@ import os, parseutils, strutils
 
 when not defined(windows):
   type
-    TConverter = object {.pure, final.}
+    TConverter = object
     PConverter* = ptr TConverter ## can convert between two character sets
-    
+
 else:
   type
     TCodePage = distinct int32
-    PConverter* = object {.pure.}
+    PConverter* = object
       dest, src: TCodePage
-    
+
 type
   EInvalidEncoding* = object of EInvalidValue ## exception that is raised
                                               ## for encoding errors
@@ -425,7 +425,7 @@ else:
           dst = cast[cstring](cast[int](cstring(result)) + offset)
           outLen = len(result) - offset
         else:
-          OSError()
+          OSError(lerr.TOSErrorCode)
     # iconv has a buffer that needs flushing, specially if the last char is 
     # not '\0'
     discard iconv(c, nil, nil, dst, outlen)
diff --git a/lib/pure/os.nim b/lib/pure/os.nim
index d74cb1fb9..2e15587f4 100644
--- a/lib/pure/os.nim
+++ b/lib/pure/os.nim
@@ -165,8 +165,12 @@ else: # UNIX-like operating system
     DynlibFormat* = when defined(macosx): "lib$1.dylib" else: "lib$1.so"
 
 when defined(posix):
-  var
-    pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint
+  when NoFakeVars:
+    const pathMax = 5000 # doesn't matter really. The concept of PATH_MAX
+                         # doesn't work anymore on modern OSes.
+  else:
+    var
+      pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint
 
 const
   ExtSep* = '.'
@@ -341,16 +345,6 @@ when defined(windows):
     template FindNextFile(a, b: expr): expr = FindNextFileW(a, b)
     template getCommandLine(): expr = getCommandLineW()
 
-    proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
-      let 
-        nul = 0
-        dot = ord('.')
-      result = (f.cFilename[0].int == dot)
-      if result:
-        result = (f.cFilename[1].int in {dot, nul})
-        if result:
-          result = (f.cFilename[2].int == nul)
-
     template getFilename(f: expr): expr =
       $cast[WideCString](addr(f.cFilename[0]))
   else:
@@ -358,18 +352,13 @@ when defined(windows):
     template FindNextFile(a, b: expr): expr = FindNextFileA(a, b)
     template getCommandLine(): expr = getCommandLineA()
 
-    proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
-      let 
-        nul = '\0'
-        dot = '.'
-      result = (f.cFilename[0] == dot)
-      if result:
-        result = (f.cFilename[1] in {dot, nul})
-        if result:
-          result = (f.cFilename[2] == nul)
-
     template getFilename(f: expr): expr = $f.cFilename
 
+  proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
+    const dot = ord('.')
+    result = f.cFilename[0].int == dot and(f.cFilename[1].int == 0 or
+             f.cFilename[1].int == dot and f.cFilename[2].int == 0)
+
 proc existsFile*(filename: string): bool {.rtl, extern: "nos$1",
                                           tags: [FReadDir].} =
   ## Returns true if the file exists, false otherwise.
@@ -468,20 +457,21 @@ proc setCurrentDir*(newDir: string) {.inline, tags: [].} =
   ## `newDir` cannot been set.
   when defined(Windows):
     when useWinUnicode:
-      if SetCurrentDirectoryW(newWideCString(newDir)) == 0'i32: OSError(OSLastError())
+      if SetCurrentDirectoryW(newWideCString(newDir)) == 0'i32:
+        OSError(OSLastError())
     else:
       if SetCurrentDirectoryA(newDir) == 0'i32: OSError(OSLastError())
   else:
     if chdir(newDir) != 0'i32: OSError(OSLastError())
 
-proc JoinPath*(head, tail: string): string {.
+proc joinPath*(head, tail: string): string {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Joins two directory names to one.
   ##
   ## For example on Unix:
   ##
   ## .. code-block:: nimrod
-  ##   JoinPath("usr", "lib")
+  ##   joinPath("usr", "lib")
   ##
   ## results in:
   ##
@@ -495,10 +485,10 @@ proc JoinPath*(head, tail: string): string {.
   ## examples on Unix:
   ##
   ## .. code-block:: nimrod
-  ##   assert JoinPath("usr", "") == "usr/"
-  ##   assert JoinPath("", "lib") == "lib"
-  ##   assert JoinPath("", "/lib") == "/lib"
-  ##   assert JoinPath("usr/", "/lib") == "usr/lib"
+  ##   assert joinPath("usr", "") == "usr/"
+  ##   assert joinPath("", "lib") == "lib"
+  ##   assert joinPath("", "/lib") == "/lib"
+  ##   assert joinPath("usr/", "/lib") == "usr/lib"
   if len(head) == 0:
     result = tail
   elif head[len(head)-1] in {DirSep, AltSep}:
@@ -512,14 +502,14 @@ proc JoinPath*(head, tail: string): string {.
     else:
       result = head & DirSep & tail
 
-proc JoinPath*(parts: varargs[string]): string {.noSideEffect,
+proc joinPath*(parts: varargs[string]): string {.noSideEffect,
   rtl, extern: "nos$1OpenArray".} =
-  ## The same as `JoinPath(head, tail)`, but works with any number of directory
+  ## The same as `joinPath(head, tail)`, but works with any number of directory
   ## parts. You need to pass at least one element or the proc will assert in
   ## debug builds and crash on release builds.
   result = parts[0]
   for i in 1..high(parts):
-    result = JoinPath(result, parts[i])
+    result = joinPath(result, parts[i])
 
 proc `/` * (head, tail: string): string {.noSideEffect.} =
   ## The same as ``JoinPath(head, tail)``
@@ -531,9 +521,9 @@ proc `/` * (head, tail: string): string {.noSideEffect.} =
   ##   assert "" / "lib" == "lib"
   ##   assert "" / "/lib" == "/lib"
   ##   assert "usr/" / "/lib" == "usr/lib"
-  return JoinPath(head, tail)
+  return joinPath(head, tail)
 
-proc SplitPath*(path: string): tuple[head, tail: string] {.
+proc splitPath*(path: string): tuple[head, tail: string] {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Splits a directory into (head, tail), so that
   ## ``JoinPath(head, tail) == path``.
@@ -541,11 +531,11 @@ proc SplitPath*(path: string): tuple[head, tail: string] {.
   ## Examples:
   ##
   ## .. code-block:: nimrod
-  ##   SplitPath("usr/local/bin") -> ("usr/local", "bin")
-  ##   SplitPath("usr/local/bin/") -> ("usr/local/bin", "")
-  ##   SplitPath("bin") -> ("", "bin")
-  ##   SplitPath("/bin") -> ("", "bin")
-  ##   SplitPath("") -> ("", "")
+  ##   splitPath("usr/local/bin") -> ("usr/local", "bin")
+  ##   splitPath("usr/local/bin/") -> ("usr/local/bin", "")
+  ##   splitPath("bin") -> ("", "bin")
+  ##   splitPath("/bin") -> ("", "bin")
+  ##   splitPath("") -> ("", "")
   var sepPos = -1
   for i in countdown(len(path)-1, 0):
     if path[i] in {dirsep, altsep}:
@@ -699,7 +689,7 @@ proc expandFilename*(filename: string): string {.rtl, extern: "nos$1",
     if r.isNil: OSError(OSLastError())
     setlen(result, c_strlen(result))
 
-proc ChangeFileExt*(filename, ext: string): string {.
+proc changeFileExt*(filename, ext: string): string {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Changes the file extension to `ext`.
   ##
@@ -971,7 +961,10 @@ proc moveFile*(source, dest: string) {.rtl, extern: "nos$1",
     raise newException(EOS, $strerror(errno))
 
 when not defined(ENOENT) and not defined(Windows):
-  var ENOENT {.importc, header: "<errno.h>".}: cint
+  when NoFakeVars:
+    const ENOENT = cint(2) # 2 on most systems including Solaris
+  else:
+    var ENOENT {.importc, header: "<errno.h>".}: cint
 
 when defined(Windows):
   when useWinUnicode:
@@ -1028,7 +1021,7 @@ when defined(windows):
     proc strEnd(cstr: wideCString, c = 0'i32): wideCString {.
       importc: "wcschr", header: "<string.h>".}
   else:
-    proc strEnd(cstr: CString, c = 0'i32): CString {.
+    proc strEnd(cstr: cstring, c = 0'i32): cstring {.
       importc: "strchr", header: "<string.h>".}
 
   proc getEnvVarsC() =
@@ -1333,7 +1326,7 @@ proc copyDir*(source, dest: string) {.rtl, extern: "nos$1",
       copyFile(path, dest / noSource)
     of pcDir:
       copyDir(path, dest / noSource)
-    else: nil
+    else: discard
 
 proc parseCmdLine*(c: string): seq[string] {.
   noSideEffect, rtl, extern: "nos$1".} =
diff --git a/lib/pure/sockets.nim b/lib/pure/sockets.nim
index 66bb1e6a9..684def978 100644
--- a/lib/pure/sockets.nim
+++ b/lib/pure/sockets.nim
@@ -131,7 +131,7 @@ type
 
   ETimeout* = object of ESynch
 
-let
+const
   InvalidSocket*: TSocket = nil ## invalid socket
 
 when defined(windows):
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index fe71cb77b..e290226d2 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -93,7 +93,7 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
   var j = 0
   for i in 0..len(s) - 1:
     if s[i] in {'A'..'Z'}:
-      result[j] = Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
       inc j
     elif s[i] != '_':
       result[j] = s[i]
@@ -1022,8 +1022,8 @@ proc editDistance*(a, b: string): int {.noSideEffect,
 
 # floating point formating:
 
-proc c_sprintf(buf, frmt: CString) {.nodecl, importc: "sprintf", varargs,
-                                     noSideEffect.}
+proc c_sprintf(buf, frmt: CString) {.header: "<stdio.h>", importc: "sprintf",
+                                     varargs, noSideEffect.}
 
 type
   TFloatFormat* = enum ## the different modes of floating point formating
diff --git a/lib/system.nim b/lib/system.nim
index 106eb04a3..14be9cc21 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -186,7 +186,10 @@ proc `..`*[T](b: T): TSlice[T] {.noSideEffect, inline.} =
 when not defined(niminheritable):
   {.pragma: inheritable.}
 
-when not defined(JS) and not defined(NimrodVM):
+const NoFakeVars* = defined(NimrodVM) ## true if the backend doesn't support \
+  ## "fake variables" like 'var EBADF {.importc.}: cint'.
+
+when not defined(JS):
   type
     TGenericSeq {.compilerproc, pure, inheritable.} = object
       len, reserved: int
@@ -195,7 +198,8 @@ when not defined(JS) and not defined(NimrodVM):
     NimStringDesc {.compilerproc, final.} = object of TGenericSeq
       data: array[0..100_000_000, char]
     NimString = ptr NimStringDesc
-    
+
+when not defined(JS) and not defined(NimrodVM):
   template space(s: PGenericSeq): int {.dirty.} =
     s.reserved and not seqShallowFlag
 
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 13e8496d2..398656d0a 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -40,21 +40,40 @@ var
 
 # constants faked as variables:
 when not defined(SIGINT):
-  var 
-    SIGINT {.importc: "SIGINT", nodecl.}: cint
-    SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
-    SIGABRT {.importc: "SIGABRT", nodecl.}: cint
-    SIGFPE {.importc: "SIGFPE", nodecl.}: cint
-    SIGILL {.importc: "SIGILL", nodecl.}: cint
+  when NoFakeVars:
+    when defined(windows):
+      const
+        SIGABRT = cint(22)
+        SIGFPE = cint(8)
+        SIGILL = cint(4)
+        SIGINT = cint(2)
+        SIGSEGV = cint(11)
+        SIGTERM = cint(15)
+    elif defined(macosx):
+      const
+        SIGABRT = cint(6)
+        SIGFPE = cint(8)
+        SIGILL = cint(4)
+        SIGINT = cint(2)
+        SIGSEGV = cint(11)
+        SIGTERM = cint(15)
+    else:
+      {.error: "SIGABRT not ported to your platform".}
+  else:
+    var
+      SIGINT {.importc: "SIGINT", nodecl.}: cint
+      SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
+      SIGABRT {.importc: "SIGABRT", nodecl.}: cint
+      SIGFPE {.importc: "SIGFPE", nodecl.}: cint
+      SIGILL {.importc: "SIGILL", nodecl.}: cint
 
 when defined(macosx):
-  var
-    SIGBUS {.importc: "SIGBUS", nodecl.}: cint
-      # hopefully this does not lead to new bugs
+  when NoFakeVars:
+    const SIGBUS = cint(10)
+  else:
+    var SIGBUS {.importc: "SIGBUS", nodecl.}: cint
 else:
-  var
-    SIGBUS {.importc: "SIGSEGV", nodecl.}: cint
-      # only Mac OS X has this shit
+  template SIGBUS: expr = SIGSEGV
 
 proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.
   header: "<setjmp.h>", importc: "longjmp".}
@@ -111,16 +130,22 @@ proc c_realloc(p: pointer, newsize: int): pointer {.
 
 when hostOS != "standalone":
   when not defined(errno):
-    var errno {.importc, header: "<errno.h>".}: cint ## error variable
+    when defined(NimrodVM):
+      var vmErrnoWrapper {.importc.}: ptr cint
+      template errno: expr = 
+        bind vmErrnoWrapper
+        vmErrnoWrapper[]
+    else:
+      var errno {.importc, header: "<errno.h>".}: cint ## error variable
 proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".}
 
-proc c_remove(filename: CString): cint {.
+proc c_remove(filename: cstring): cint {.
   importc: "remove", header: "<stdio.h>".}
-proc c_rename(oldname, newname: CString): cint {.
+proc c_rename(oldname, newname: cstring): cint {.
   importc: "rename", header: "<stdio.h>".}
 
-proc c_system(cmd: CString): cint {.importc: "system", header: "<stdlib.h>".}
-proc c_getenv(env: CString): CString {.importc: "getenv", header: "<stdlib.h>".}
-proc c_putenv(env: CString): cint {.importc: "putenv", header: "<stdlib.h>".}
+proc c_system(cmd: cstring): cint {.importc: "system", header: "<stdlib.h>".}
+proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
+proc c_putenv(env: cstring): cint {.importc: "putenv", header: "<stdlib.h>".}
 
 {.pop}
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index a877f8b28..8d9400a7d 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -45,9 +45,21 @@ proc setvbuf(stream: TFile, buf: pointer, typ, size: cint): cint {.
 proc write(f: TFile, c: cstring) = fputs(c, f)
 {.pop.}
 
-var
-  IOFBF {.importc: "_IOFBF", nodecl.}: cint
-  IONBF {.importc: "_IONBF", nodecl.}: cint
+when NoFakeVars:
+  when defined(windows):
+    const
+      IOFBF = cint(0)
+      IONBF = cint(4)
+  elif defined(macosx):
+    const
+      IOFBF = cint(0)
+      IONBF = cint(2)
+  else:
+    {.error: "IOFBF not ported to your platform".}
+else:
+  var
+    IOFBF {.importc: "_IOFBF", nodecl.}: cint
+    IONBF {.importc: "_IONBF", nodecl.}: cint
 
 const
   buf_size = 4000
@@ -149,7 +161,7 @@ proc writeFile(filename, content: string) =
   finally:
     close(f)
 
-proc EndOfFile(f: TFile): bool =
+proc endOfFile(f: TFile): bool =
   # do not blame me; blame the ANSI C standard this is so brain-damaged
   var c = fgetc(f)
   ungetc(c, f)
@@ -194,9 +206,9 @@ const
     # should not be translated.
 
 
-proc Open(f: var TFile, filename: string,
+proc open(f: var TFile, filename: string,
           mode: TFileMode = fmRead,
-          bufSize: int = -1): Bool =
+          bufSize: int = -1): bool =
   var p: pointer = fopen(filename, FormatOpen[mode])
   result = (p != nil)
   f = cast[TFile](p)
@@ -223,10 +235,10 @@ proc fwrite(buf: Pointer, size, n: int, f: TFile): int {.
 proc readBuffer(f: TFile, buffer: pointer, len: int): int =
   result = fread(buffer, 1, len, f)
 
-proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
+proc readBytes(f: TFile, a: var openarray[int8], start, len: int): int =
   result = readBuffer(f, addr(a[start]), len)
 
-proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
+proc readChars(f: TFile, a: var openarray[char], start, len: int): int =
   result = readBuffer(f, addr(a[start]), len)
 
 {.push stackTrace:off, profiler:off.}
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index cf1f0910c..d856cc830 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -9,6 +9,9 @@
 
 ## Nimrod support for C/C++'s `wide strings`:idx:. This is part of the system
 ## module! Do not import it directly!
+
+when not defined(NimString):
+  {.error: "You must not import this module explicitly".}
 
 type
   TUtf16Char* = distinct int16
@@ -101,7 +104,8 @@ proc newWideCString*(s: cstring): WideCString =
   if s.isNil: return nil
 
   when not defined(c_strlen):
-    proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
+    proc c_strlen(a: cstring): int {.
+      header: "<string.h>", noSideEffect, importc: "strlen".}
 
   let L = cstrlen(s)
   result = newWideCString(s, L)
diff --git a/lib/wrappers/libffi/common/ffi.h b/lib/wrappers/libffi/common/ffi.h
new file mode 100644
index 000000000..07d650eac
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffi.h
@@ -0,0 +1,331 @@
+/* -----------------------------------------------------------------*-C-*-
+   libffi 2.00-beta - Copyright (c) 1996-2003  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------
+   The basic API is described in the README file.
+
+   The raw API is designed to bypass some of the argument packing
+   and unpacking on architectures for which it can be avoided.
+
+   The closure API allows interpreted functions to be packaged up
+   inside a C function pointer, so that they can be called as C functions,
+   with no understanding on the client side that they are interpreted.
+   It can also be used in other cases in which it is necessary to package
+   up a user specified parameter and a function pointer as a single
+   function pointer.
+
+   The closure API must be implemented in order to get its functionality,
+   e.g. for use by gij.  Routines are provided to emulate the raw API
+   if the underlying platform doesn't allow faster implementation.
+
+   More details on the raw and cloure API can be found in:
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00138.html
+
+   and
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00174.html
+   -------------------------------------------------------------------- */
+
+#ifndef LIBFFI_H
+#define LIBFFI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Specify which architecture libffi is configured for. */
+//XXX #define X86
+
+/* ---- System configuration information --------------------------------- */
+
+#include <ffitarget.h>
+
+#ifndef LIBFFI_ASM
+
+#include <stddef.h>
+#include <limits.h>
+
+/* LONG_LONG_MAX is not always defined (not if STRICT_ANSI, for example).
+   But we can find it either under the correct ANSI name, or under GNU
+   C's internal name.  */
+#ifdef LONG_LONG_MAX
+# define FFI_LONG_LONG_MAX LONG_LONG_MAX
+#else
+# ifdef LLONG_MAX
+#  define FFI_LONG_LONG_MAX LLONG_MAX
+# else
+#  ifdef __GNUC__
+#   define FFI_LONG_LONG_MAX __LONG_LONG_MAX__
+#  endif
+#  ifdef _MSC_VER
+#   define FFI_LONG_LONG_MAX _I64_MAX
+#  endif
+# endif
+#endif
+
+#if SCHAR_MAX == 127
+# define ffi_type_uchar                ffi_type_uint8
+# define ffi_type_schar                ffi_type_sint8
+#else
+ #error "char size not supported"
+#endif
+
+#if SHRT_MAX == 32767
+# define ffi_type_ushort       ffi_type_uint16
+# define ffi_type_sshort       ffi_type_sint16
+#elif SHRT_MAX == 2147483647
+# define ffi_type_ushort       ffi_type_uint32
+# define ffi_type_sshort       ffi_type_sint32
+#else
+ #error "short size not supported"
+#endif
+
+#if INT_MAX == 32767
+# define ffi_type_uint         ffi_type_uint16
+# define ffi_type_sint         ffi_type_sint16
+#elif INT_MAX == 2147483647
+# define ffi_type_uint         ffi_type_uint32
+# define ffi_type_sint         ffi_type_sint32
+#elif INT_MAX == 9223372036854775807
+# define ffi_type_uint         ffi_type_uint64
+# define ffi_type_sint         ffi_type_sint64
+#else
+ #error "int size not supported"
+#endif
+
+#define ffi_type_ulong         ffi_type_uint64
+#define ffi_type_slong         ffi_type_sint64
+#if LONG_MAX == 2147483647
+# if FFI_LONG_LONG_MAX != 9223372036854775807
+  #error "no 64-bit data type supported"
+# endif
+#elif LONG_MAX != 9223372036854775807
+ #error "long size not supported"
+#endif
+
+/* The closure code assumes that this works on pointers, i.e. a size_t	*/
+/* can hold a pointer.							*/
+
+typedef struct _ffi_type
+{
+  size_t size;
+  unsigned short alignment;
+  unsigned short type;
+  /*@null@*/ struct _ffi_type **elements;
+} ffi_type;
+
+/* These are defined in types.c */
+extern const ffi_type ffi_type_void;
+extern const ffi_type ffi_type_uint8;
+extern const ffi_type ffi_type_sint8;
+extern const ffi_type ffi_type_uint16;
+extern const ffi_type ffi_type_sint16;
+extern const ffi_type ffi_type_uint32;
+extern const ffi_type ffi_type_sint32;
+extern const ffi_type ffi_type_uint64;
+extern const ffi_type ffi_type_sint64;
+extern const ffi_type ffi_type_float;
+extern const ffi_type ffi_type_double;
+extern const ffi_type ffi_type_longdouble;
+extern const ffi_type ffi_type_pointer;
+
+
+typedef enum {
+  FFI_OK = 0,
+  FFI_BAD_TYPEDEF,
+  FFI_BAD_ABI 
+} ffi_status;
+
+typedef unsigned FFI_TYPE;
+
+typedef struct {
+  ffi_abi abi;
+  unsigned nargs;
+  /*@dependent@*/ ffi_type **arg_types;
+  /*@dependent@*/ ffi_type *rtype;
+  unsigned bytes;
+  unsigned flags;
+#ifdef FFI_EXTRA_CIF_FIELDS
+  FFI_EXTRA_CIF_FIELDS;
+#endif
+} ffi_cif;
+
+/* ---- Definitions for the raw API -------------------------------------- */
+
+#ifdef _WIN64
+#define FFI_SIZEOF_ARG 8
+#else
+#define FFI_SIZEOF_ARG 4
+#endif
+
+typedef union {
+  ffi_sarg  sint;
+  ffi_arg   uint;
+  float	    flt;
+  char      data[FFI_SIZEOF_ARG];
+  void*     ptr;
+} ffi_raw;
+
+void ffi_raw_call (/*@dependent@*/ ffi_cif *cif, 
+		   void (*fn)(), 
+		   /*@out@*/ void *rvalue, 
+		   /*@dependent@*/ ffi_raw *avalue);
+
+void ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw);
+void ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args);
+size_t ffi_raw_size (ffi_cif *cif);
+
+/* This is analogous to the raw API, except it uses Java parameter	*/
+/* packing, even on 64-bit machines.  I.e. on 64-bit machines		*/
+/* longs and doubles are followed by an empty 64-bit word.		*/
+
+void ffi_java_raw_call (/*@dependent@*/ ffi_cif *cif, 
+		        void (*fn)(), 
+		        /*@out@*/ void *rvalue, 
+		        /*@dependent@*/ ffi_raw *avalue);
+
+void ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw);
+void ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args);
+size_t ffi_java_raw_size (ffi_cif *cif);
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#if FFI_CLOSURES
+
+typedef struct {
+  char tramp[FFI_TRAMPOLINE_SIZE];
+  ffi_cif   *cif;
+  void     (*fun)(ffi_cif*,void*,void**,void*);
+  void      *user_data;
+} ffi_closure;
+
+void ffi_closure_free(void *);
+void *ffi_closure_alloc (size_t size, void **code);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure*,
+		  ffi_cif *,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data,
+		  void *codeloc);
+
+typedef struct {
+  char tramp[FFI_TRAMPOLINE_SIZE];
+
+  ffi_cif   *cif;
+
+#if !FFI_NATIVE_RAW_API
+
+  /* if this is enabled, then a raw closure has the same layout 
+     as a regular closure.  We use this to install an intermediate 
+     handler to do the transaltion, void** -> ffi_raw*. */
+
+  void     (*translate_args)(ffi_cif*,void*,void**,void*);
+  void      *this_closure;
+
+#endif
+
+  void     (*fun)(ffi_cif*,void*,ffi_raw*,void*);
+  void      *user_data;
+
+} ffi_raw_closure;
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure*,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data);
+
+ffi_status
+ffi_prep_java_raw_closure (ffi_raw_closure*,
+		           ffi_cif *cif,
+		           void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		           void *user_data);
+
+#endif /* FFI_CLOSURES */
+
+/* ---- Public interface definition -------------------------------------- */
+
+ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif, 
+			ffi_abi abi,
+			unsigned int nargs, 
+			/*@dependent@*/ /*@out@*/ /*@partial@*/ ffi_type *rtype, 
+			/*@dependent@*/ ffi_type **atypes);
+
+void
+ffi_call(/*@dependent@*/ ffi_cif *cif, 
+	 void (*fn)(), 
+	 /*@out@*/ void *rvalue, 
+	 /*@dependent@*/ void **avalue);
+
+/* Useful for eliminating compiler warnings */
+#define FFI_FN(f) ((void (*)())f)
+
+/* ---- Definitions shared with assembly code ---------------------------- */
+
+#endif
+
+/* If these change, update src/mips/ffitarget.h. */
+#define FFI_TYPE_VOID       0    
+#define FFI_TYPE_INT        1
+#define FFI_TYPE_FLOAT      2    
+#define FFI_TYPE_DOUBLE     3
+#if 1
+#define FFI_TYPE_LONGDOUBLE 4
+#else
+#define FFI_TYPE_LONGDOUBLE FFI_TYPE_DOUBLE
+#endif
+#define FFI_TYPE_UINT8      5   
+#define FFI_TYPE_SINT8      6
+#define FFI_TYPE_UINT16     7 
+#define FFI_TYPE_SINT16     8
+#define FFI_TYPE_UINT32     9
+#define FFI_TYPE_SINT32     10
+#define FFI_TYPE_UINT64     11
+#define FFI_TYPE_SINT64     12
+#define FFI_TYPE_STRUCT     13
+#define FFI_TYPE_POINTER    14
+
+/* This should always refer to the last type code (for sanity checks) */
+#define FFI_TYPE_LAST       FFI_TYPE_POINTER
+
+#define FFI_HIDDEN /* no idea what the origial definition looks like ... */
+
+#ifdef __GNUC__
+#  define LIKELY(x) __builtin_expect(x, 1)
+#  define UNLIKELY(x) __builtin_expect(x, 0)
+#else
+#  define LIKELY(x) (x)
+#  define UNLIKELY(x) (x)
+#endif
+
+#define MAYBE_UNUSED
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/lib/wrappers/libffi/common/ffi_common.h b/lib/wrappers/libffi/common/ffi_common.h
new file mode 100644
index 000000000..43fb83b48
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffi_common.h
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   ffi_common.h - Copyright (c) 1996  Red Hat, Inc.
+
+   Common internal definitions and macros. Only necessary for building
+   libffi.
+   ----------------------------------------------------------------------- */
+
+#ifndef FFI_COMMON_H
+#define FFI_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <fficonfig.h>
+#include <malloc.h>
+
+/* Check for the existence of memcpy. */
+#if STDC_HEADERS
+# include <string.h>
+#else
+# ifndef HAVE_MEMCPY
+#  define memcpy(d, s, n) bcopy ((s), (d), (n))
+# endif
+#endif
+
+#if defined(FFI_DEBUG) 
+#include <stdio.h>
+#endif
+
+#ifdef FFI_DEBUG
+/*@exits@*/ void ffi_assert(/*@temp@*/ char *expr, /*@temp@*/ char *file, int line);
+void ffi_stop_here(void);
+void ffi_type_test(/*@temp@*/ /*@out@*/ ffi_type *a, /*@temp@*/ char *file, int line);
+
+#define FFI_ASSERT(x) ((x) ? (void)0 : ffi_assert(#x, __FILE__,__LINE__))
+#define FFI_ASSERT_AT(x, f, l) ((x) ? 0 : ffi_assert(#x, (f), (l)))
+#define FFI_ASSERT_VALID_TYPE(x) ffi_type_test (x, __FILE__, __LINE__)
+#else
+#define FFI_ASSERT(x) 
+#define FFI_ASSERT_AT(x, f, l)
+#define FFI_ASSERT_VALID_TYPE(x)
+#endif
+
+#define ALIGN(v, a)  (((((size_t) (v))-1) | ((a)-1))+1)
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif);
+
+/* Extended cif, used in callback from assembly routine */
+typedef struct
+{
+  /*@dependent@*/ ffi_cif *cif;
+  /*@dependent@*/ void *rvalue;
+  /*@dependent@*/ void **avalue;
+} extended_cif;
+
+/* Terse sized type definitions.  */
+typedef unsigned int UINT8  __attribute__((__mode__(__QI__)));
+typedef signed int   SINT8  __attribute__((__mode__(__QI__)));
+typedef unsigned int UINT16 __attribute__((__mode__(__HI__)));
+typedef signed int   SINT16 __attribute__((__mode__(__HI__)));
+typedef unsigned int UINT32 __attribute__((__mode__(__SI__)));
+typedef signed int   SINT32 __attribute__((__mode__(__SI__)));
+typedef unsigned int UINT64 __attribute__((__mode__(__DI__)));
+typedef signed int   SINT64 __attribute__((__mode__(__DI__)));
+
+typedef float FLOAT32;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+
diff --git a/lib/wrappers/libffi/common/fficonfig.h b/lib/wrappers/libffi/common/fficonfig.h
new file mode 100644
index 000000000..c14f653ec
--- /dev/null
+++ b/lib/wrappers/libffi/common/fficonfig.h
@@ -0,0 +1,96 @@
+/* fficonfig.h.  Originally created by configure, now hand_maintained for MSVC. */
+
+/* fficonfig.h.  Generated automatically by configure.  */
+/* fficonfig.h.in.  Generated automatically from configure.in by autoheader.  */
+
+/* Define this for MSVC, but not for mingw32! */
+#ifdef _MSC_VER
+#define __attribute__(x) /* */
+#endif
+#define alloca _alloca
+
+/*----------------------------------------------------------------*/
+
+/* Define if using alloca.c.  */
+/* #undef C_ALLOCA */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+   This function is required for alloca.c support on those systems.  */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define if you have alloca, as a function or macro.  */
+#define HAVE_ALLOCA 1
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix).  */
+/* #define HAVE_ALLOCA_H 1 */
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files.  */
+#define STDC_HEADERS 1
+
+/* Define if you have the memcpy function.  */
+#define HAVE_MEMCPY 1
+
+/* Define if read-only mmap of a plain file works. */
+//#define HAVE_MMAP_FILE 1
+
+/* Define if mmap of /dev/zero works. */
+//#define HAVE_MMAP_DEV_ZERO 1
+
+/* Define if mmap with MAP_ANON(YMOUS) works. */
+//#define HAVE_MMAP_ANON 1
+
+/* The number of bytes in type double */
+#define SIZEOF_DOUBLE 8
+
+/* The number of bytes in type long double */
+#define SIZEOF_LONG_DOUBLE 12
+
+/* Define if you have the long double type and it is bigger than a double */
+#define HAVE_LONG_DOUBLE 1
+
+/* whether byteorder is bigendian */
+/* #undef WORDS_BIGENDIAN */
+
+/* Define if the host machine stores words of multi-word integers in
+   big-endian order. */
+/* #undef HOST_WORDS_BIG_ENDIAN */
+
+/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
+#define BYTEORDER 1234
+
+/* Define if your assembler and linker support unaligned PC relative relocs. */
+/* #undef HAVE_AS_SPARC_UA_PCREL */
+
+/* Define if your assembler supports .register. */
+/* #undef HAVE_AS_REGISTER_PSEUDO_OP */
+
+/* Define if .eh_frame sections should be read-only. */
+/* #undef HAVE_RO_EH_FRAME */
+
+/* Define to the flags needed for the .section .eh_frame directive. */
+/* #define EH_FRAME_FLAGS "aw" */
+
+/* Define to the flags needed for the .section .eh_frame directive. */
+/* #define EH_FRAME_FLAGS "aw" */
+
+/* Define this if you want extra debugging. */
+/* #undef FFI_DEBUG */
+
+/* Define this is you do not want support for aggregate types. */
+/* #undef FFI_NO_STRUCTS */
+
+/* Define this is you do not want support for the raw API. */
+/* #undef FFI_NO_RAW_API */
+
+/* Define this if you are using Purify and want to suppress spurious messages. */
+/* #undef USING_PURIFY */
+
diff --git a/lib/wrappers/libffi/common/ffitarget.h b/lib/wrappers/libffi/common/ffitarget.h
new file mode 100644
index 000000000..d8d60f2e7
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffitarget.h
@@ -0,0 +1,150 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012  Anthony Green
+                 Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+                 Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__))
+#  if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64)
+#    define X86_64
+#    define X86_WIN64
+#  else
+#    define X86_32
+#    define X86_WIN32
+#  endif
+#endif
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+  /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+  FFI_SYSV,
+  FFI_STDCALL,
+  FFI_THISCALL,
+  FFI_FASTCALL,
+  FFI_MS_CDECL,
+  FFI_LAST_ABI,
+#ifdef _MSC_VER
+  FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
+  FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+
+#elif defined(X86_WIN64)
+  FFI_WIN64,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_WIN64
+
+#else
+  /* ---- Intel x86 and AMD x86-64 - */
+  FFI_SYSV,
+  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
+  FFI_LAST_ABI,
+#if defined(__i386__) || defined(__i386)
+  FFI_DEFAULT_ABI = FFI_SYSV
+#else
+  FFI_DEFAULT_ABI = FFI_UNIX64
+#endif
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 52
+#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
+#define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
+#endif
+#endif
+
+#endif
+
diff --git a/lib/wrappers/libffi/common/malloc_closure.c b/lib/wrappers/libffi/common/malloc_closure.c
new file mode 100644
index 000000000..5b33aa4ca
--- /dev/null
+++ b/lib/wrappers/libffi/common/malloc_closure.c
@@ -0,0 +1,110 @@
+#include <ffi.h>
+#ifdef MS_WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#  define MAP_ANONYMOUS MAP_ANON
+# endif
+#endif
+#include "ctypes.h"
+
+/* BLOCKSIZE can be adjusted.  Larger blocksize will take a larger memory
+   overhead, but allocate less blocks from the system.  It may be that some
+   systems have a limit of how many mmap'd blocks can be open.
+*/
+
+#define BLOCKSIZE _pagesize
+
+/* #define MALLOC_CLOSURE_DEBUG */ /* enable for some debugging output */
+
+/******************************************************************/
+
+typedef union _tagITEM {
+    ffi_closure closure;
+    union _tagITEM *next;
+} ITEM;
+
+static ITEM *free_list;
+static int _pagesize;
+
+static void more_core(void)
+{
+    ITEM *item;
+    int count, i;
+
+/* determine the pagesize */
+#ifdef MS_WIN32
+    if (!_pagesize) {
+        SYSTEM_INFO systeminfo;
+        GetSystemInfo(&systeminfo);
+        _pagesize = systeminfo.dwPageSize;
+    }
+#else
+    if (!_pagesize) {
+#ifdef _SC_PAGESIZE
+        _pagesize = sysconf(_SC_PAGESIZE);
+#else
+        _pagesize = getpagesize();
+#endif
+    }
+#endif
+
+    /* calculate the number of nodes to allocate */
+    count = BLOCKSIZE / sizeof(ITEM);
+
+    /* allocate a memory block */
+#ifdef MS_WIN32
+    item = (ITEM *)VirtualAlloc(NULL,
+                                           count * sizeof(ITEM),
+                                           MEM_COMMIT,
+                                           PAGE_EXECUTE_READWRITE);
+    if (item == NULL)
+        return;
+#else
+    item = (ITEM *)mmap(NULL,
+                        count * sizeof(ITEM),
+                        PROT_READ | PROT_WRITE | PROT_EXEC,
+                        MAP_PRIVATE | MAP_ANONYMOUS,
+                        -1,
+                        0);
+    if (item == (void *)MAP_FAILED)
+        return;
+#endif
+
+#ifdef MALLOC_CLOSURE_DEBUG
+    printf("block at %p allocated (%d bytes), %d ITEMs\n",
+           item, count * sizeof(ITEM), count);
+#endif
+    /* put them into the free list */
+    for (i = 0; i < count; ++i) {
+        item->next = free_list;
+        free_list = item;
+        ++item;
+    }
+}
+
+/******************************************************************/
+
+/* put the item back into the free list */
+void ffi_closure_free(void *p)
+{
+    ITEM *item = (ITEM *)p;
+    item->next = free_list;
+    free_list = item;
+}
+
+/* return one item from the free list, allocating more if needed */
+void *ffi_closure_alloc(size_t ignored, void** codeloc)
+{
+    ITEM *item;
+    if (!free_list)
+        more_core();
+    if (!free_list)
+        return NULL;
+    item = free_list;
+    free_list = item->next;
+    *codeloc = (void *)item;
+    return (void *)item;
+}
diff --git a/lib/wrappers/libffi/common/raw_api.c b/lib/wrappers/libffi/common/raw_api.c
new file mode 100644
index 000000000..ce21372e2
--- /dev/null
+++ b/lib/wrappers/libffi/common/raw_api.c
@@ -0,0 +1,254 @@
+/* -----------------------------------------------------------------------
+   raw_api.c - Copyright (c) 1999, 2008  Red Hat, Inc.
+
+   Author: Kresten Krab Thorup <krab@gnu.org>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This file defines generic functions for use with the raw api. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_NO_RAW_API
+
+size_t
+ffi_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+#if !FFI_NO_STRUCTS
+      if ((*at)->type == FFI_TYPE_STRUCT)
+	result += ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
+      else
+#endif
+	result += ALIGN ((*at)->size, FFI_SIZEOF_ARG);
+    }
+
+  return result;
+}
+
+
+void
+ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1);
+	  break;
+	  
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4	  
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4);
+	  break;
+#endif
+	
+#if !FFI_NO_STRUCTS  
+	case FFI_TYPE_STRUCT:
+	  *args = (raw++)->ptr;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+	  
+	default:
+	  *args = raw;
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+#if !FFI_NO_STRUCTS
+      if ((*tp)->type == FFI_TYPE_STRUCT)
+	{
+	  *args = (raw++)->ptr;
+	}
+      else
+#endif
+	{
+	  *args = (void*) raw;
+	  raw += ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
+	}
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	  (raw++)->uint = *(UINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT8:
+	  (raw++)->sint = *(SINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	  (raw++)->uint = *(UINT16*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT16:
+	  (raw++)->sint = *(SINT16*) (*args);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4
+	case FFI_TYPE_UINT32:
+	  (raw++)->uint = *(UINT32*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT32:
+	  (raw++)->sint = *(SINT32*) (*args);
+	  break;
+#endif
+
+#if !FFI_NO_STRUCTS
+	case FFI_TYPE_STRUCT:
+	  (raw++)->ptr = *args;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, raw, cl->user_data);
+}
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* cl,
+			  ffi_cif *cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
+
+#if FFI_CLOSURES
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure* cl,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data)
+{
+  return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+
+#endif /* !FFI_NO_RAW_API */
diff --git a/lib/wrappers/libffi/gcc/closures.c b/lib/wrappers/libffi/gcc/closures.c
new file mode 100644
index 000000000..c0ee06891
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/closures.c
@@ -0,0 +1,627 @@
+/* -----------------------------------------------------------------------
+   closures.c - Copyright (c) 2007, 2009, 2010  Red Hat, Inc.
+                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
+                Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   Code to allocate and deallocate memory for closures.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined __linux__ && !defined _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
+# if __gnu_linux__
+/* This macro indicates it may be forbidden to map anonymous memory
+   with both write and execute permission.  Code compiled when this
+   option is defined will attempt to map such pages once, but if it
+   fails, it falls back to creating a temporary file in a writable and
+   executable filesystem and mapping pages from it into separate
+   locations in the virtual memory space, one location writable and
+   another executable.  */
+#  define FFI_MMAP_EXEC_WRIT 1
+#  define HAVE_MNTENT 1
+# endif
+# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)
+/* Windows systems may have Data Execution Protection (DEP) enabled, 
+   which requires the use of VirtualMalloc/VirtualFree to alloc/free
+   executable memory. */
+#  define FFI_MMAP_EXEC_WRIT 1
+# endif
+#endif
+
+#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
+# ifdef __linux__
+/* When defined to 1 check for SELinux and if SELinux is active,
+   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
+   might cause audit messages.  */
+#  define FFI_MMAP_EXEC_SELINUX 1
+# endif
+#endif
+
+#if FFI_CLOSURES
+
+# if FFI_EXEC_TRAMPOLINE_TABLE
+
+// Per-target implementation; It's unclear what can reasonable be shared
+// between two OS/architecture implementations.
+
+# elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
+
+#define USE_LOCKS 1
+#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 1
+#endif
+#endif
+
+/* We need to use mmap, not sbrk.  */
+#define HAVE_MORECORE 0
+
+/* We could, in theory, support mremap, but it wouldn't buy us anything.  */
+#define HAVE_MREMAP 0
+
+/* We have no use for this, so save some code and data.  */
+#define NO_MALLINFO 1
+
+/* We need all allocations to be in regular segments, otherwise we
+   lose track of the corresponding code address.  */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+
+/* Don't allocate more than a page unless needed.  */
+#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
+
+#if FFI_CLOSURE_TEST
+/* Don't release single pages, to avoid a worst-case scenario of
+   continuously allocating and releasing single pages, but release
+   pairs of pages, which should do just as well given that allocations
+   are likely to be small.  */
+#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize)
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <stdio.h>
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#ifdef HAVE_MNTENT
+#include <mntent.h>
+#endif /* HAVE_MNTENT */
+#include <sys/param.h>
+#include <pthread.h>
+
+/* We don't want sys/mman.h to be included after we redefine mmap and
+   dlmunmap.  */
+#include <sys/mman.h>
+#define LACKS_SYS_MMAN_H 1
+
+#if FFI_MMAP_EXEC_SELINUX
+#include <sys/statfs.h>
+#include <stdlib.h>
+
+static int selinux_enabled = -1;
+
+static int
+selinux_enabled_check (void)
+{
+  struct statfs sfs;
+  FILE *f;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (statfs ("/selinux", &sfs) >= 0
+      && (unsigned int) sfs.f_type == 0xf97cff8cU)
+    return 1;
+  f = fopen ("/proc/mounts", "r");
+  if (f == NULL)
+    return 0;
+  while (getline (&buf, &len, f) >= 0)
+    {
+      char *p = strchr (buf, ' ');
+      if (p == NULL)
+        break;
+      p = strchr (p + 1, ' ');
+      if (p == NULL)
+        break;
+      if (strncmp (p + 1, "selinuxfs ", 10) == 0)
+        {
+          free (buf);
+          fclose (f);
+          return 1;
+        }
+    }
+  free (buf);
+  fclose (f);
+  return 0;
+}
+
+#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \
+			      : (selinux_enabled = selinux_enabled_check ()))
+
+#else
+
+#define is_selinux_enabled() 0
+
+#endif /* !FFI_MMAP_EXEC_SELINUX */
+
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
+  if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL)
+    return 1;
+  else
+    return 0;
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+                               : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#elif defined (__CYGWIN__) || defined(__INTERIX)
+
+#include <sys/mman.h>
+
+/* Cygwin is Linux-like, but not quite that Linux-like.  */
+#define is_selinux_enabled() 0
+
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+/* Use these for mmap and munmap within dlmalloc.c.  */
+static void *dlmmap(void *, size_t, int, int, int, off_t);
+static int dlmunmap(void *, size_t);
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+
+/* A mutex used to synchronize access to *exec* variables in this file.  */
+static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* A file descriptor of a temporary file from which we'll map
+   executable pages.  */
+static int execfd = -1;
+
+/* The amount of space already allocated from the temporary file.  */
+static size_t execsize = 0;
+
+/* Open a temporary file name, and immediately unlink it.  */
+static int
+open_temp_exec_file_name (char *name)
+{
+  int fd = mkstemp (name);
+
+  if (fd != -1)
+    unlink (name);
+
+  return fd;
+}
+
+/* Open a temporary file in the named directory.  */
+static int
+open_temp_exec_file_dir (const char *dir)
+{
+  static const char suffix[] = "/ffiXXXXXX";
+  int lendir = strlen (dir);
+  char *tempname = __builtin_alloca (lendir + sizeof (suffix));
+
+  if (!tempname)
+    return -1;
+
+  memcpy (tempname, dir, lendir);
+  memcpy (tempname + lendir, suffix, sizeof (suffix));
+
+  return open_temp_exec_file_name (tempname);
+}
+
+/* Open a temporary file in the directory in the named environment
+   variable.  */
+static int
+open_temp_exec_file_env (const char *envvar)
+{
+  const char *value = getenv (envvar);
+
+  if (!value)
+    return -1;
+
+  return open_temp_exec_file_dir (value);
+}
+
+#ifdef HAVE_MNTENT
+/* Open a temporary file in an executable and writable mount point
+   listed in the mounts file.  Subsequent calls with the same mounts
+   keep searching for mount points in the same file.  Providing NULL
+   as the mounts file closes the file.  */
+static int
+open_temp_exec_file_mnt (const char *mounts)
+{
+  static const char *last_mounts;
+  static FILE *last_mntent;
+
+  if (mounts != last_mounts)
+    {
+      if (last_mntent)
+	endmntent (last_mntent);
+
+      last_mounts = mounts;
+
+      if (mounts)
+	last_mntent = setmntent (mounts, "r");
+      else
+	last_mntent = NULL;
+    }
+
+  if (!last_mntent)
+    return -1;
+
+  for (;;)
+    {
+      int fd;
+      struct mntent mnt;
+      char buf[MAXPATHLEN * 3];
+
+      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
+	return -1;
+
+      if (hasmntopt (&mnt, "ro")
+	  || hasmntopt (&mnt, "noexec")
+	  || access (mnt.mnt_dir, W_OK))
+	continue;
+
+      fd = open_temp_exec_file_dir (mnt.mnt_dir);
+
+      if (fd != -1)
+	return fd;
+    }
+}
+#endif /* HAVE_MNTENT */
+
+/* Instructions to look for a location to hold a temporary file that
+   can be mapped in for execution.  */
+static struct
+{
+  int (*func)(const char *);
+  const char *arg;
+  int repeat;
+} open_temp_exec_file_opts[] = {
+  { open_temp_exec_file_env, "TMPDIR", 0 },
+  { open_temp_exec_file_dir, "/tmp", 0 },
+  { open_temp_exec_file_dir, "/var/tmp", 0 },
+  { open_temp_exec_file_dir, "/dev/shm", 0 },
+  { open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
+  { open_temp_exec_file_mnt, "/etc/mtab", 1 },
+  { open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
+};
+
+/* Current index into open_temp_exec_file_opts.  */
+static int open_temp_exec_file_opts_idx = 0;
+
+/* Reset a current multi-call func, then advances to the next entry.
+   If we're at the last, go back to the first and return nonzero,
+   otherwise return zero.  */
+static int
+open_temp_exec_file_opts_next (void)
+{
+  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL);
+
+  open_temp_exec_file_opts_idx++;
+  if (open_temp_exec_file_opts_idx
+      == (sizeof (open_temp_exec_file_opts)
+	  / sizeof (*open_temp_exec_file_opts)))
+    {
+      open_temp_exec_file_opts_idx = 0;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return a file descriptor of a temporary zero-sized file in a
+   writable and exexutable filesystem.  */
+static int
+open_temp_exec_file (void)
+{
+  int fd;
+
+  do
+    {
+      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func
+	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg);
+
+      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat
+	  || fd == -1)
+	{
+	  if (open_temp_exec_file_opts_next ())
+	    break;
+	}
+    }
+  while (fd == -1);
+
+  return fd;
+}
+
+/* Map in a chunk of memory from the temporary exec file into separate
+   locations in the virtual memory address space, one writable and one
+   executable.  Returns the address of the writable portion, after
+   storing an offset to the corresponding executable portion at the
+   last word of the requested chunk.  */
+static void *
+dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+{
+  void *ptr;
+
+  if (execfd == -1)
+    {
+      open_temp_exec_file_opts_idx = 0;
+    retry_open:
+      execfd = open_temp_exec_file ();
+      if (execfd == -1)
+	return MFAIL;
+    }
+
+  offset = execsize;
+
+  if (ftruncate (execfd, offset + length))
+    return MFAIL;
+
+  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+  flags |= MAP_SHARED;
+
+  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC,
+	      flags, execfd, offset);
+  if (ptr == MFAIL)
+    {
+      if (!offset)
+	{
+	  close (execfd);
+	  goto retry_open;
+	}
+      ftruncate (execfd, offset);
+      return MFAIL;
+    }
+  else if (!offset
+	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts_next ();
+
+  start = mmap (start, length, prot, flags, execfd, offset);
+
+  if (start == MFAIL)
+    {
+      munmap (ptr, length);
+      ftruncate (execfd, offset);
+      return start;
+    }
+
+  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start;
+
+  execsize += length;
+
+  return start;
+}
+
+/* Map in a writable and executable chunk of memory if possible.
+   Failing that, fall back to dlmmap_locked.  */
+static void *
+dlmmap (void *start, size_t length, int prot,
+	int flags, int fd, off_t offset)
+{
+  void *ptr;
+
+  assert (start == NULL && length % malloc_getpagesize == 0
+	  && prot == (PROT_READ | PROT_WRITE)
+	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS)
+	  && fd == -1 && offset == 0);
+
+#if FFI_CLOSURE_TEST
+  printf ("mapping in %zi\n", length);
+#endif
+
+  if (execfd == -1 && is_emutramp_enabled ())
+    {
+      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+      return ptr;
+    }
+
+  if (execfd == -1 && !is_selinux_enabled ())
+    {
+      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
+
+      if (ptr != MFAIL || (errno != EPERM && errno != EACCES))
+	/* Cool, no need to mess with separate segments.  */
+	return ptr;
+
+      /* If MREMAP_DUP is ever introduced and implemented, try mmap
+	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with
+	 MREMAP_DUP and prot at this point.  */
+    }
+
+  if (execsize == 0 || execfd == -1)
+    {
+      pthread_mutex_lock (&open_temp_exec_file_mutex);
+      ptr = dlmmap_locked (start, length, prot, flags, offset);
+      pthread_mutex_unlock (&open_temp_exec_file_mutex);
+
+      return ptr;
+    }
+
+  return dlmmap_locked (start, length, prot, flags, offset);
+}
+
+/* Release memory at the given address, as well as the corresponding
+   executable page if it's separate.  */
+static int
+dlmunmap (void *start, size_t length)
+{
+  /* We don't bother decreasing execsize or truncating the file, since
+     we can't quite tell whether we're unmapping the end of the file.
+     We don't expect frequent deallocation anyway.  If we did, we
+     could locate pages in the file by writing to the pages being
+     deallocated and checking that the file contents change.
+     Yuck.  */
+  msegmentptr seg = segment_holding (gm, start);
+  void *code;
+
+#if FFI_CLOSURE_TEST
+  printf ("unmapping %zi\n", length);
+#endif
+
+  if (seg && (code = add_segment_exec_offset (start, seg)) != start)
+    {
+      int ret = munmap (code, length);
+      if (ret)
+	return ret;
+    }
+
+  return munmap (start, length);
+}
+
+#if FFI_CLOSURE_FREE_CODE
+/* Return segment holding given code address.  */
+static msegmentptr
+segment_holding_code (mstate m, char* addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= add_segment_exec_offset (sp->base, sp)
+	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+#endif
+
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+/* Allocate a chunk of memory with the given size.  Returns a pointer
+   to the writable address, and sets *CODE to the executable
+   corresponding virtual address.  */
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  *code = malloc(size);
+  return *code;
+#if 0
+  void *ptr;
+
+  if (!code)
+    return NULL;
+
+  ptr = dlmalloc (size);
+
+  if (ptr)
+    {
+      msegmentptr seg = segment_holding (gm, ptr);
+
+      *code = add_segment_exec_offset (ptr, seg);
+    }
+
+  return ptr;
+#endif
+}
+
+/* Release a chunk of memory allocated with ffi_closure_alloc.  If
+   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
+   writable or the executable address given.  Otherwise, only the
+   writable address can be provided here.  */
+void
+ffi_closure_free (void *ptr)
+{
+#if 0
+#if FFI_CLOSURE_FREE_CODE
+  msegmentptr seg = segment_holding_code(gm, ptr);
+
+  if (seg)
+    ptr = sub_segment_exec_offset(ptr, seg);
+#endif
+
+  dlfree(ptr);
+#endif
+  free(ptr);
+}
+
+
+#if FFI_CLOSURE_TEST
+/* Do some internal sanity testing to make sure allocation and
+   deallocation of pages are working as intended.  */
+int main ()
+{
+  void *p[3];
+#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0)
+#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0)
+  GET (0, malloc_getpagesize / 2);
+  GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*));
+  PUT (1);
+  GET (1, 2 * malloc_getpagesize);
+  GET (2, malloc_getpagesize / 2);
+  PUT (1);
+  PUT (0);
+  PUT (2);
+  return 0;
+}
+#endif /* FFI_CLOSURE_TEST */
+# else /* ! FFI_MMAP_EXEC_WRIT */
+
+/* On many systems, memory returned by malloc is writable and
+   executable, so just use it.  */
+
+#include <stdlib.h>
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  return *code = malloc (size);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  free (ptr);
+}
+
+# endif /* ! FFI_MMAP_EXEC_WRIT */
+#endif /* FFI_CLOSURES */
diff --git a/lib/wrappers/libffi/gcc/ffi.c b/lib/wrappers/libffi/gcc/ffi.c
new file mode 100644
index 000000000..0600414d4
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi.c
@@ -0,0 +1,841 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifdef X86_WIN32
+  size_t p_stack_args[2];
+  void *p_stack_data[2];
+  char *argp2 = stack;
+  int stack_args_count = 0;
+  int cabi = ecif->cif->abi;
+#endif
+
+  argp = stack;
+
+  if ((ecif->cif->flags == FFI_TYPE_STRUCT
+       || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+          && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+      )
+    {
+      *(void **) argp = ecif->rvalue;
+#ifdef X86_WIN32
+      /* For fastcall/thiscall this is first register-passed
+         argument.  */
+      if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	{
+	  p_stack_args[stack_args_count] = sizeof (void*);
+	  p_stack_data[stack_args_count] = argp;
+	  ++stack_args_count;
+	}
+#endif
+      argp += sizeof(void*);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+#ifdef X86_WIN64
+      if (z > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = sizeof(ffi_arg);
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < sizeof(ffi_arg))
+        {
+          z = sizeof(ffi_arg);
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+
+#ifdef X86_WIN32
+    /* For thiscall/fastcall convention register-passed arguments
+       are the first two none-floating-point arguments with a size
+       smaller or equal to sizeof (void*).  */
+    if ((cabi == FFI_THISCALL && stack_args_count < 1)
+        || (cabi == FFI_FASTCALL && stack_args_count < 2))
+      {
+	if (z <= 4
+	    && ((*p_arg)->type != FFI_TYPE_FLOAT
+	        && (*p_arg)->type != FFI_TYPE_STRUCT))
+	  {
+	    p_stack_args[stack_args_count] = z;
+	    p_stack_data[stack_args_count] = argp;
+	    ++stack_args_count;
+	  }
+      }
+#endif
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+#ifdef X86_WIN32
+  /* We need to move the register-passed arguments for thiscall/fastcall
+     on top of stack, so that those can be moved to registers ecx/edx by
+     call-handler.  */
+  if (stack_args_count > 0)
+    {
+      size_t zz = (p_stack_args[0] + 3) & ~3;
+      char *h;
+
+      /* Move first argument to top-stack position.  */
+      if (p_stack_data[0] != argp2)
+	{
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[0], zz);
+	  memmove (argp2 + zz, argp2,
+	           (size_t) ((char *) p_stack_data[0] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+
+      argp2 += zz;
+      --stack_args_count;
+      if (zz > 4)
+	stack_args_count = 0;
+
+      /* If we have a second argument, then move it on top
+         after the first one.  */
+      if (stack_args_count > 0 && p_stack_data[1] != argp2)
+	{
+	  zz = p_stack_args[1];
+	  zz = (zz + 3) & ~3;
+	  h = alloca (zz + 1);
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[1], zz);
+	  memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+    }
+#endif
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+#ifdef X86_WIN32
+          if (cif->abi == FFI_MS_CDECL)
+            cif->flags = FFI_TYPE_MS_STRUCT;
+          else
+#endif
+            cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && cif->rtype->size != 1 && cif->rtype->size != 2
+      && cif->rtype->size != 4 && cif->rtype->size != 8)
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                     cif->flags, ecif.rvalue, fn);
+      break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC or SUNPRO_C -- standard conventions apply. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+                                         void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+                            ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+#ifdef X86_WIN64
+  if (cif->rtype->size > sizeof(ffi_arg)
+      || (cif->flags == FFI_TYPE_STRUCT
+          && (cif->rtype->size != 1 && cif->rtype->size != 2
+              && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#else
+  if ( cif->flags == FFI_TYPE_STRUCT
+       || cif->flags == FFI_TYPE_MS_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#endif
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp) {
+        argp = (char *) ALIGN(argp, sizeof(void*));
+      }
+
+#ifdef X86_WIN64
+      if ((*p_arg)->size > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+                  && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+        {
+          z = sizeof(void *);
+          *p_argv = *(void **)argp;
+        }
+      else
+#endif
+        {
+          z = (*p_arg)->size;
+          
+          /* because we're little endian, this is what it turns into.   */
+          
+          *p_argv = (void*) argp;
+        }
+          
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 49);  \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned int *) &__tramp[0] = 0x8324048b;	/* mov (%esp), %eax */ \
+   *(unsigned int *) &__tramp[4] = 0x4c890cec;	/* sub $12, %esp */ \
+   *(unsigned int *) &__tramp[8] = 0x04890424;	/* mov %ecx, 4(%esp) */ \
+   *(unsigned char*) &__tramp[12] = 0x24;	/* mov %eax, (%esp) */ \
+   *(unsigned char*) &__tramp[13] = 0xb8; \
+   *(unsigned int *) &__tramp[14] = __size;	/* mov __size, %eax */ \
+   *(unsigned int *) &__tramp[18] = 0x08244c8d;	/* lea 8(%esp), %ecx */ \
+   *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
+   *(unsigned short*) &__tramp[26] = 0x0b74;	/* jz 1f */ \
+   *(unsigned int *) &__tramp[28] = 0x8908518b;	/* 2b: mov 8(%ecx), %edx */ \
+   *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
+   *(unsigned char*) &__tramp[36] = 0x48;	/* dec %eax */ \
+   *(unsigned short*) &__tramp[37] = 0xf575;	/* jnz 2b ; 1f: */ \
+   *(unsigned char*) &__tramp[39] = 0xb8; \
+   *(unsigned int*)  &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[44] = 0xe8; \
+   *(unsigned int*)  &__tramp[45] = __dis; /* call __fun  */ \
+   *(unsigned char*)  &__tramp[49] = 0xc2; /* ret  */ \
+   *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
+				    &ffi_closure_THISCALL,
+				    (void*)codeloc,
+				    cif->bytes);
+    }
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc, cif->bytes);
+    }
+  else if (cif->abi == FFI_MS_CDECL)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+    if (cif->abi != FFI_THISCALL)
+#endif
+    return FFI_BAD_ABI;
+  }
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+#ifdef X86_WIN32
+  if (cif->abi == FFI_SYSV)
+    {
+#endif
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+#ifdef X86_WIN32
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+				    codeloc, cif->bytes);
+    }
+#endif
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  cif->abi = abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  cif->abi = abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
diff --git a/lib/wrappers/libffi/gcc/ffi64.c b/lib/wrappers/libffi/gcc/ffi64.c
new file mode 100644
index 000000000..2014af24c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi64.c
@@ -0,0 +1,673 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+             
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS]; 
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	int size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  /* Sanity check on the cif ABI.  */
+  {
+    int abi = cif->abi;
+    if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+      return FFI_BAD_ABI;
+  }
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *((unsigned long long * volatile) &tramp[1])
+    = (unsigned long) ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *((unsigned long long * volatile) &tramp[6])
+    = (unsigned long) codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/lib/wrappers/libffi/gcc/prep_cif.c b/lib/wrappers/libffi/gcc/prep_cif.c
new file mode 100644
index 000000000..e8ec5cf1e
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/prep_cif.c
@@ -0,0 +1,237 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green
+                Copyright (c) 1996, 1998, 2007  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(ffi_type *arg)
+{
+  ffi_type **ptr;
+
+  if (UNLIKELY(arg == NULL || arg->elements == NULL))
+    return FFI_BAD_TYPEDEF;
+
+  arg->size = 0;
+  arg->alignment = 0;
+
+  ptr = &(arg->elements[0]);
+
+  if (UNLIKELY(ptr == 0))
+    return FFI_BAD_TYPEDEF;
+
+  while ((*ptr) != NULL)
+    {
+      if (UNLIKELY(((*ptr)->size == 0)
+		    && (initialize_aggregate((*ptr)) != FFI_OK)))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+/* For non variadic functions isvariadic should be 0 and
+   nfixedargs==ntotalargs.
+
+   For variadic calls, isvariadic should be 1 and nfixedargs
+   and ntotalargs set as appropriate. nfixedargs must always be >=1 */
+
+
+ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
+			     unsigned int isvariadic,
+                             unsigned int nfixedargs,
+                             unsigned int ntotalargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1));
+  FFI_ASSERT(nfixedargs <= ntotalargs);
+
+#ifndef X86_WIN32
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+#else
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI || abi == FFI_THISCALL))
+    return FFI_BAD_ABI;
+#endif
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = ntotalargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
+#if !defined M68K && !defined X86_ANY && !defined S390 && !defined PA
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+#ifdef TILE
+      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+      && (cif->rtype->size > 16)
+#endif
+
+     )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined X86_ANY && !defined S390 && !defined PA
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+
+#ifdef TILE
+	  if (bytes < 10 * FFI_SIZEOF_ARG &&
+	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+	    {
+	      /* An argument is never split between the 10 parameter
+		 registers and the stack.  */
+	      bytes = 10 * FFI_SIZEOF_ARG;
+	    }
+#endif
+#ifdef XTENSA
+	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+	    bytes = 6*4;
+#endif
+
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+#ifdef FFI_TARGET_SPECIFIC_VARIADIC
+  if (isvariadic)
+	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs);
+#endif
+
+  return ffi_prep_cif_machdep(cif);
+}
+#endif /* not __CRIS__ */
+
+ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes);
+}
+
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+                            ffi_abi abi,
+                            unsigned int nfixedargs,
+                            unsigned int ntotalargs,
+                            ffi_type *rtype,
+                            ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+}
+
+#if FFI_CLOSURES
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+{
+  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure);
+}
+
+#endif
diff --git a/lib/wrappers/libffi/gcc/types.c b/lib/wrappers/libffi/gcc/types.c
new file mode 100644
index 000000000..0a11eb0fb
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/types.c
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* Hide the basic type definitions from the header file, so that we
+   can redefine them here as "const".  */
+#define LIBFFI_HIDE_BASIC_TYPES
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_TYPEDEF(name, type, id)		\
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
+const ffi_type ffi_type_##name = {		\
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}
+
+/* Size and alignment are fake here. They must not be 0. */
+const ffi_type ffi_type_void = {
+  1, 1, FFI_TYPE_VOID, NULL
+};
+
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64);
+
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER);
+
+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE);
+
+#ifdef __alpha__
+/* Even if we're not configured to default to 128-bit long double, 
+   maintain binary compatibility, as -mlong-double-128 can be used
+   at any time.  */
+/* Validate the hard-coded number below.  */
+# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
+#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
+#endif
diff --git a/lib/wrappers/libffi/gcc/win32_asm.asm b/lib/wrappers/libffi/gcc/win32_asm.asm
new file mode 100644
index 000000000..ce3c4f3f3
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.asm
@@ -0,0 +1,759 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffitarget.h>
+
+	.text
+ 
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_call_win32
+#ifndef __OS2__
+	.def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        // Make room for all of the new args.
+        movl  20(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        // Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+ 
+        // Return stack to previous state and call the function
+        addl  $8,%esp
+
+	// Handle fastcall and thiscall
+	cmpl $3, 16(%ebp)  // FFI_THISCALL
+	jz .do_thiscall
+	cmpl $4, 16(%ebp) // FFI_FASTCALL
+	jnz .do_fncall
+	movl (%esp), %ecx
+	movl 4(%esp), %edx
+	addl $8, %esp
+	jmp .do_fncall
+.do_thiscall:
+	movl (%esp), %ecx
+	addl $4, %esp
+
+.do_fncall:
+	 
+        // FIXME: Align the stack to a 128-bit boundary to avoid
+        // potential performance hits.
+
+        call  *32(%ebp)
+ 
+        // stdcall functions pop arguments off the stack themselves
+
+        // Load %ecx with the return type code
+        movl  24(%ebp),%ecx
+ 
+        // If the return value pointer is NULL, assume no return value.
+        cmpl  $0,28(%ebp)
+        jne   0f
+ 
+        // Even if there is no space for the return value, we are
+        // obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   .Lnoretval
+        fstp  %st(0)
+ 
+        jmp   .Lepilogue
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lstore_table:
+	.long	.Lnoretval		/* FFI_TYPE_VOID */
+	.long	.Lretint		/* FFI_TYPE_INT */
+	.long	.Lretfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lretdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lretlongdouble		/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lretuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lretsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lretuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lretsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lretint		/* FFI_TYPE_UINT32 */
+	.long	.Lretint		/* FFI_TYPE_SINT32 */
+	.long	.Lretint64		/* FFI_TYPE_UINT64 */
+	.long	.Lretint64		/* FFI_TYPE_SINT64 */
+	.long	.Lretstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lretint		/* FFI_TYPE_POINTER */
+	.long	.Lretstruct1b		/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lretstruct2b		/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lretstruct4b		/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lretstruct		/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%ecx, %ecx
+	add	%ecx, %ecx
+	add	(%esp),%ecx
+	add	$4, %esp
+	jmp	*(%ecx)
+
+	/* Sign/zero extend as appropriate.  */
+.Lretsint8:
+	movsbl	%al, %eax
+	jmp	.Lretint
+
+.Lretsint16:
+	movswl	%ax, %eax
+	jmp	.Lretint
+
+.Lretuint8:
+	movzbl	%al, %eax
+	jmp	.Lretint
+
+.Lretuint16:
+	movzwl	%ax, %eax
+	jmp	.Lretint
+
+.Lretint:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretfloat:
+         // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstps (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretlongdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretint64:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+	jmp   .Lepilogue
+
+.Lretstruct1b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretstruct2b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct4b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct:
+        // Nothing to do!
+ 
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+	.globl	_ffi_closure_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	jmp	.stub
+.LFE1:
+
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+.stub:
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+	.long	.Lcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lcls_retint		/* FFI_TYPE_INT */
+	.long	.Lcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lcls_retdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lcls_retuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lcls_retsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lcls_retstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lcls_retmsstruct	/* FFI_TYPE_MS_STRUCT */
+
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lcls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct:
+        // Caller expects us to pop struct return value pointer hidden arg.
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$0x4
+
+.Lcls_retmsstruct:
+	// Caller expects us to return a pointer to the real return value.
+	mov	%ecx, %eax
+	// Caller doesn't expects us to pop struct return value pointer hidden arg.
+	jmp	.Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+        .balign 16
+	.globl	_ffi_closure_raw_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_raw_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	jmp	.stubraw
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_raw_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_raw_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_SYSV:
+.LFB4:
+	pushl	%ebp
+.LCFI6:
+	movl	%esp, %ebp
+.LCFI7:
+	pushl	%esi
+.LCFI8:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+.stubraw:
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+	.long	.Lrcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lrcls_retint		/* FFI_TYPE_INT */
+	.long	.Lrcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lrcls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lrcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lrcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lrcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lrcls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lrcls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lrcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lrcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lrcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lrcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lrcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lrcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lrcls_retsint8:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retsint16:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint8:
+	movzbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint16:
+	movzwl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct1:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct2:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct4:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct:
+	// Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+        // This assumes we are using gas.
+	.balign	16
+	.globl	_ffi_closure_STDCALL
+#ifndef __OS2__
+	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_STDCALL:
+.LFB5:
+	pushl	%ebp
+.LCFI9:
+	movl	%esp, %ebp
+.LCFI10:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	movl	%edx, 4(%esp)	/* args */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+	.long	.Lscls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lscls_retint		/* FFI_TYPE_INT */
+	.long	.Lscls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lscls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lscls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lscls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lscls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lscls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lscls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lscls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lscls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lscls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lscls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lscls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lscls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lscls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lscls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lscls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lscls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retfloat:
+	flds	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct:
+	// Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#ifndef __OS2__
+	.section	.eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+	.long	.LECIE1-.LASCIE1  /* Length of Common Information Entry */
+.LASCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE1:
+
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+	.long	.LEFDE4-.LASFDE4	/* FDE Length */
+.LASFDE4:
+	.long	.LASFDE4-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB4-.	/* FDE initial location */
+#else
+	.long	.LFB4
+#endif
+	.long	.LFE4-.LFB4	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LFB4
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI6
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI8-.LCFI7
+	.byte	0x86	/* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+	.byte	0x3	/* .uleb128 0x3 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+	.long	.LEFDE5-.LASFDE5	/* FDE Length */
+.LASFDE5:
+	.long	.LASFDE5-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB5-.	/* FDE initial location */
+#else
+	.long	.LFB5
+#endif
+	.long	.LFE5-.LFB5	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI9-.LFB5
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI10-.LCFI9
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win32_asm.s b/lib/wrappers/libffi/gcc/win32_asm.s
new file mode 100644
index 000000000..7a3e7f16c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.s
@@ -0,0 +1,736 @@
+# 1 "gcc\\win32_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win32_asm.asm"
+# 33 "gcc\\win32_asm.asm"
+# 1 "common/fficonfig.h" 1
+# 34 "gcc\\win32_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 35 "gcc\\win32_asm.asm" 2
+
+
+ .text
+
+
+        .balign 16
+ .globl _ffi_call_win32
+
+ .def _ffi_call_win32; .scl 2; .type 32; .endef
+
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl %esp,%ebp
+.LCFI1:
+
+        movl 20(%ebp),%ecx
+        subl %ecx,%esp
+
+        movl %esp,%eax
+
+
+        pushl 12(%ebp)
+        pushl %eax
+        call *8(%ebp)
+
+
+        addl $8,%esp
+
+
+ cmpl $3, 16(%ebp)
+ jz .do_thiscall
+ cmpl $4, 16(%ebp)
+ jnz .do_fncall
+ movl (%esp), %ecx
+ movl 4(%esp), %edx
+ addl $8, %esp
+ jmp .do_fncall
+.do_thiscall:
+ movl (%esp), %ecx
+ addl $4, %esp
+
+.do_fncall:
+
+
+
+
+        call *32(%ebp)
+
+
+
+
+        movl 24(%ebp),%ecx
+
+
+        cmpl $0,28(%ebp)
+        jne 0f
+
+
+
+        cmpl $2,%ecx
+        jne .Lnoretval
+        fstp %st(0)
+
+        jmp .Lepilogue
+
+0:
+ call 1f
+
+.Lstore_table:
+ .long .Lnoretval
+ .long .Lretint
+ .long .Lretfloat
+ .long .Lretdouble
+ .long .Lretlongdouble
+ .long .Lretuint8
+ .long .Lretsint8
+ .long .Lretuint16
+ .long .Lretsint16
+ .long .Lretint
+ .long .Lretint
+ .long .Lretint64
+ .long .Lretint64
+ .long .Lretstruct
+ .long .Lretint
+ .long .Lretstruct1b
+ .long .Lretstruct2b
+ .long .Lretstruct4b
+ .long .Lretstruct
+1:
+ add %ecx, %ecx
+ add %ecx, %ecx
+ add (%esp),%ecx
+ add $4, %esp
+ jmp *(%ecx)
+
+
+.Lretsint8:
+ movsbl %al, %eax
+ jmp .Lretint
+
+.Lretsint16:
+ movswl %ax, %eax
+ jmp .Lretint
+
+.Lretuint8:
+ movzbl %al, %eax
+ jmp .Lretint
+
+.Lretuint16:
+ movzwl %ax, %eax
+ jmp .Lretint
+
+.Lretint:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretfloat:
+
+        movl 28(%ebp),%ecx
+        fstps (%ecx)
+        jmp .Lepilogue
+
+.Lretdouble:
+
+        movl 28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp .Lepilogue
+
+.Lretlongdouble:
+
+        movl 28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp .Lepilogue
+
+.Lretint64:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        movl %edx,4(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct1b:
+
+        movl 28(%ebp),%ecx
+        movb %al,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct2b:
+
+        movl 28(%ebp),%ecx
+        movw %ax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct4b:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct:
+
+
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+ .globl _ffi_closure_THISCALL
+
+ .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ jmp .stub
+.LFE1:
+
+
+        .balign 16
+ .globl _ffi_closure_SYSV
+
+ .def _ffi_closure_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 8(%ebp), %edx
+.stub:
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+
+0:
+ call 1f
+
+.Lcls_store_table:
+ .long .Lcls_noretval
+ .long .Lcls_retint
+ .long .Lcls_retfloat
+ .long .Lcls_retdouble
+ .long .Lcls_retldouble
+ .long .Lcls_retuint8
+ .long .Lcls_retsint8
+ .long .Lcls_retuint16
+ .long .Lcls_retsint16
+ .long .Lcls_retint
+ .long .Lcls_retint
+ .long .Lcls_retllong
+ .long .Lcls_retllong
+ .long .Lcls_retstruct
+ .long .Lcls_retint
+ .long .Lcls_retstruct1
+ .long .Lcls_retstruct2
+ .long .Lcls_retstruct4
+ .long .Lcls_retmsstruct
+
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lcls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct:
+
+ movl %ebp, %esp
+ popl %ebp
+ ret $0x4
+
+.Lcls_retmsstruct:
+
+ mov %ecx, %eax
+
+ jmp .Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+
+
+
+
+
+
+        .balign 16
+ .globl _ffi_closure_raw_THISCALL
+
+ .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 12(%ebp), %edx
+ jmp .stubraw
+
+        .balign 16
+ .globl _ffi_closure_raw_SYSV
+
+ .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_SYSV:
+.LFB4:
+ pushl %ebp
+.LCFI6:
+ movl %esp, %ebp
+.LCFI7:
+ pushl %esi
+.LCFI8:
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 8(%ebp), %edx
+.stubraw:
+ movl %edx, 8(%esp)
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp)
+ movl %esi, (%esp)
+ call *(((52 + 3) & ~3) + 4)(%eax)
+ movl 20(%esi), %eax
+0:
+ call 1f
+
+.Lrcls_store_table:
+ .long .Lrcls_noretval
+ .long .Lrcls_retint
+ .long .Lrcls_retfloat
+ .long .Lrcls_retdouble
+ .long .Lrcls_retldouble
+ .long .Lrcls_retuint8
+ .long .Lrcls_retsint8
+ .long .Lrcls_retuint16
+ .long .Lrcls_retsint16
+ .long .Lrcls_retint
+ .long .Lrcls_retint
+ .long .Lrcls_retllong
+ .long .Lrcls_retllong
+ .long .Lrcls_retstruct
+ .long .Lrcls_retint
+ .long .Lrcls_retstruct1
+ .long .Lrcls_retstruct2
+ .long .Lrcls_retstruct4
+ .long .Lrcls_retstruct
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lrcls_retsint8:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retsint16:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint8:
+ movzbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint16:
+ movzwl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct1:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct2:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct4:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct:
+
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+
+
+
+ .balign 16
+ .globl _ffi_closure_STDCALL
+
+ .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_STDCALL:
+.LFB5:
+ pushl %ebp
+.LCFI9:
+ movl %esp, %ebp
+.LCFI10:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+0:
+ call 1f
+
+.Lscls_store_table:
+ .long .Lscls_noretval
+ .long .Lscls_retint
+ .long .Lscls_retfloat
+ .long .Lscls_retdouble
+ .long .Lscls_retldouble
+ .long .Lscls_retuint8
+ .long .Lscls_retsint8
+ .long .Lscls_retuint16
+ .long .Lscls_retsint16
+ .long .Lscls_retint
+ .long .Lscls_retint
+ .long .Lscls_retllong
+ .long .Lscls_retllong
+ .long .Lscls_retstruct
+ .long .Lscls_retint
+ .long .Lscls_retstruct1
+ .long .Lscls_retstruct2
+ .long .Lscls_retstruct4
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lscls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retint:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retfloat:
+ flds (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retdouble:
+ fldl (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retldouble:
+ fldt (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct:
+
+
+.Lscls_noretval:
+.Lscls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+
+ .section .eh_frame,"w"
+
+.Lframe1:
+.LSCIE1:
+ .long .LECIE1-.LASCIE1
+.LASCIE1:
+ .long 0x0
+ .byte 0x1
+
+
+
+ .ascii "\0"
+
+ .byte 0x1
+ .byte 0x7c
+ .byte 0x8
+
+
+
+
+ .byte 0xc
+ .byte 0x4
+ .byte 0x4
+ .byte 0x88
+ .byte 0x1
+ .align 4
+.LECIE1:
+
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1
+.LASFDE1:
+ .long .LASFDE1-.Lframe1
+
+
+
+ .long .LFB1
+
+ .long .LFE1-.LFB1
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI0-.LFB1
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI1-.LCFI0
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE1:
+
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3
+.LASFDE3:
+ .long .LASFDE3-.Lframe1
+
+
+
+ .long .LFB3
+
+ .long .LFE3-.LFB3
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI4-.LFB3
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI5-.LCFI4
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE3:
+
+
+
+.LSFDE4:
+ .long .LEFDE4-.LASFDE4
+.LASFDE4:
+ .long .LASFDE4-.Lframe1
+
+
+
+ .long .LFB4
+
+ .long .LFE4-.LFB4
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI6-.LFB4
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI7-.LCFI6
+ .byte 0xd
+ .byte 0x5
+
+ .byte 0x4
+ .long .LCFI8-.LCFI7
+ .byte 0x86
+ .byte 0x3
+
+
+ .align 4
+.LEFDE4:
+
+
+
+.LSFDE5:
+ .long .LEFDE5-.LASFDE5
+.LASFDE5:
+ .long .LASFDE5-.Lframe1
+
+
+
+ .long .LFB5
+
+ .long .LFE5-.LFB5
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI9-.LFB5
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI10-.LCFI9
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win64_asm.asm b/lib/wrappers/libffi/gcc/win64_asm.asm
new file mode 100644
index 000000000..1dc98f99a
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.asm
@@ -0,0 +1,467 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+// ffi_closure_win64 will be called with these registers set:
+//    rax points to 'closure'
+//    r11 contains a bit mask that specifies which of the
+//    first four parameters are float or double
+// // It must move the parameters passed in registers to their stack location,
+// call ffi_closure_win64_inner for the actual work, then return the result.
+// 
+	.balign 16
+        .globl SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	// copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+// ALLOCSTACK 40
+	sub	$40, %rsp
+// ENDPROLOG
+	mov	%rax, %rcx	// context is first parameter
+	mov	%rsp, %rdx	// stack is second parameter
+	add	$48, %rdx	// point to start of arguments
+	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
+	callq	*%rax		// call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	// If the closure returned a float,
+                                // ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+        // copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        // PUSHREG rbp
+	push	%rbp
+        // ALLOCSTACK 48
+	sub	$48,%rsp
+        // SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        // ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
diff --git a/lib/wrappers/libffi/gcc/win64_asm.s b/lib/wrappers/libffi/gcc/win64_asm.s
new file mode 100644
index 000000000..f2c2df10d
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.s
@@ -0,0 +1,227 @@
+# 1 "gcc\\win64_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win64_asm.asm"
+
+# 1 "common/fficonfig.h" 1
+# 3 "gcc\\win64_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 4 "gcc\\win64_asm.asm" 2
+# 244 "gcc\\win64_asm.asm"
+.text
+
+.extern ffi_closure_win64_inner
+# 255 "gcc\\win64_asm.asm"
+ .balign 16
+        .globl ffi_closure_win64
+ffi_closure_win64:
+
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+
+ sub $40, %rsp
+
+ mov %rax, %rcx
+ mov %rsp, %rdx
+ add $48, %rdx
+ mov $SYMBOL_NAME(ffi_closure_win64_inner), %rax
+ callq *%rax
+ add $40, %rsp
+ movq %rax, %xmm0
+
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+        .globl ffi_call_win64
+ffi_call_win64:
+
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+
+ push %rbp
+
+ sub $48,%rsp
+
+ lea 32(%rsp),%rbp
+
+
+ mov 48(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, 0(%rbp)
+
+ mov 40(%rbp), %rdx
+ mov 0(%rbp), %rcx
+ callq *32(%rbp)
+
+ mov 0(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *72(%rbp)
+.Lret_struct4b:
+  cmpl $FFI_TYPE_SMALL_STRUCT_4B, 56(%rbp)
+  jne .Lret_struct2b
+
+ mov 64(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, 56(%rbp)
+ jne .Lret_struct1b
+
+ mov 64(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, 56(%rbp)
+ jne .Lret_uint8
+
+ mov 64(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, 56(%rbp)
+ jne .Lret_sint8
+
+        mov 64(%rbp), %rcx
+        movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, 56(%rbp)
+ jne .Lret_uint16
+
+        mov 64(%rbp), %rcx
+        movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, 56(%rbp)
+ jne .Lret_sint16
+
+        mov 64(%rbp), %rcx
+        movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, 56(%rbp)
+ jne .Lret_uint32
+
+        mov 64(%rbp), %rcx
+        movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $9, 56(%rbp)
+ jne .Lret_sint32
+
+        mov 64(%rbp), %rcx
+        movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+  cmpl $10, 56(%rbp)
+  jne .Lret_float
+
+ mov 64(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+  cmpl $2, 56(%rbp)
+  jne .Lret_double
+
+  mov 64(%rbp), %rax
+  movss %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_double:
+  cmpl $3, 56(%rbp)
+  jne .Lret_sint64
+
+  mov 64(%rbp), %rax
+  movlpd %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_sint64:
+   cmpl $12, 56(%rbp)
+   jne .Lret_void
+
+  mov 64(%rbp), %rcx
+  mov %rax, (%rcx)
+  jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
diff --git a/lib/wrappers/libffi.nim b/lib/wrappers/libffi/libffi.nim
index 514ce024f..1b6130103 100644
--- a/lib/wrappers/libffi.nim
+++ b/lib/wrappers/libffi/libffi.nim
@@ -26,12 +26,36 @@
 
 {.deadCodeElim: on.}
 
-when defined(windows): 
-  const libffidll* = "libffi.dll"
-elif defined(macosx): 
-  const libffidll* = "libffi.dylib"
-else: 
-  const libffidll* = "libffi.so"
+when defined(windows):
+  # on Windows we don't use a DLL but instead embed libffi directly:
+  {.pragma: mylib, header: r"ffi.h".}
+
+  #{.compile: r"common\malloc_closure.c".}
+  {.compile: r"common\raw_api.c".}
+  when defined(vcc):
+    {.compile: r"msvc\ffi.c".}
+    {.compile: r"msvc\prep_cif.c".}
+    {.compile: r"msvc\win32.c".}
+    {.compile: r"msvc\types.c".}
+    when defined(cpu64):
+      {.compile: r"msvc\win64_asm.asm".}
+    else:
+      {.compile: r"msvc\win32_asm.asm".}
+  else:
+    {.compile: r"gcc\ffi.c".}
+    {.compile: r"gcc\prep_cif.c".}
+    {.compile: r"gcc\types.c".}
+    {.compile: r"gcc\closures.c".}
+    when defined(cpu64):
+      {.compile: r"gcc\ffi64.c".}
+      {.compile: r"gcc\win64_asm.S".}
+    else:
+      {.compile: r"gcc\win32_asm.S".}
+
+elif defined(macosx):
+  {.pragma: mylib, dynlib: "libffi.dylib".}
+else:
+  {.pragma: mylib, dynlib: "libffi.so".}
 
 type
   TArg* = int
@@ -88,19 +112,19 @@ type
     elements*: ptr ptr TType
 
 var
-  type_void* {.importc: "ffi_type_void", dynlib: libffidll.}: TType
-  type_uint8* {.importc: "ffi_type_uint8", dynlib: libffidll.}: TType
-  type_sint8* {.importc: "ffi_type_sint8", dynlib: libffidll.}: TType
-  type_uint16* {.importc: "ffi_type_uint16", dynlib: libffidll.}: TType
-  type_sint16* {.importc: "ffi_type_sint16", dynlib: libffidll.}: TType
-  type_uint32* {.importc: "ffi_type_uint32", dynlib: libffidll.}: TType
-  type_sint32* {.importc: "ffi_type_sint32", dynlib: libffidll.}: TType
-  type_uint64* {.importc: "ffi_type_uint64", dynlib: libffidll.}: TType
-  type_sint64* {.importc: "ffi_type_sint64", dynlib: libffidll.}: TType
-  type_float* {.importc: "ffi_type_float", dynlib: libffidll.}: TType
-  type_double* {.importc: "ffi_type_double", dynlib: libffidll.}: TType
-  type_pointer* {.importc: "ffi_type_pointer", dynlib: libffidll.}: TType
-  type_longdouble* {.importc: "ffi_type_longdouble", dynlib: libffidll.}: TType
+  type_void* {.importc: "ffi_type_void", mylib.}: TType
+  type_uint8* {.importc: "ffi_type_uint8", mylib.}: TType
+  type_sint8* {.importc: "ffi_type_sint8", mylib.}: TType
+  type_uint16* {.importc: "ffi_type_uint16", mylib.}: TType
+  type_sint16* {.importc: "ffi_type_sint16", mylib.}: TType
+  type_uint32* {.importc: "ffi_type_uint32", mylib.}: TType
+  type_sint32* {.importc: "ffi_type_sint32", mylib.}: TType
+  type_uint64* {.importc: "ffi_type_uint64", mylib.}: TType
+  type_sint64* {.importc: "ffi_type_sint64", mylib.}: TType
+  type_float* {.importc: "ffi_type_float", mylib.}: TType
+  type_double* {.importc: "ffi_type_double", mylib.}: TType
+  type_pointer* {.importc: "ffi_type_pointer", mylib.}: TType
+  type_longdouble* {.importc: "ffi_type_longdouble", mylib.}: TType
 
 type 
   Tstatus* {.size: sizeof(cint).} = enum 
@@ -119,20 +143,18 @@ type
     sint*: TSArg
 
 proc raw_call*(cif: var Tcif; fn: proc () {.cdecl.}; rvalue: pointer; 
-               avalue: ptr TRaw) {.cdecl, importc: "ffi_raw_call", 
-                                   dynlib: libffidll.}
+               avalue: ptr TRaw) {.cdecl, importc: "ffi_raw_call", mylib.}
 proc ptrarray_to_raw*(cif: var Tcif; args: ptr pointer; raw: ptr TRaw) {.cdecl, 
-    importc: "ffi_ptrarray_to_raw", dynlib: libffidll.}
+    importc: "ffi_ptrarray_to_raw", mylib.}
 proc raw_to_ptrarray*(cif: var Tcif; raw: ptr TRaw; args: ptr pointer) {.cdecl, 
-    importc: "ffi_raw_to_ptrarray", dynlib: libffidll.}
-proc raw_size*(cif: var Tcif): int {.cdecl, importc: "ffi_raw_size", 
-                                     dynlib: libffidll.}
+    importc: "ffi_raw_to_ptrarray", mylib.}
+proc raw_size*(cif: var Tcif): int {.cdecl, importc: "ffi_raw_size", mylib.}
 
 proc prep_cif*(cif: var Tcif; abi: TABI; nargs: cuint; rtype: ptr TType; 
                atypes: ptr ptr TType): TStatus {.cdecl, importc: "ffi_prep_cif", 
-    dynlib: libffidll.}
+    mylib.}
 proc call*(cif: var Tcif; fn: proc () {.cdecl.}; rvalue: pointer; 
-           avalue: ptr pointer) {.cdecl, importc: "ffi_call", dynlib: libffidll.}
+           avalue: ptr pointer) {.cdecl, importc: "ffi_call", mylib.}
 
 # the same with an easier interface:
 type
@@ -141,9 +163,9 @@ type
 
 proc prep_cif*(cif: var Tcif; abi: TABI; nargs: cuint; rtype: ptr TType; 
                atypes: TParamList): TStatus {.cdecl, importc: "ffi_prep_cif",
-    dynlib: libffidll.}
+    mylib.}
 proc call*(cif: var Tcif; fn, rvalue: pointer;
-           avalue: TArgList) {.cdecl, importc: "ffi_call", dynlib: libffidll.}
+           avalue: TArgList) {.cdecl, importc: "ffi_call", mylib.}
 
 # Useful for eliminating compiler warnings 
 ##define FFI_FN(f) ((void (*)(void))f)
diff --git a/lib/wrappers/libffi/msvc/ffi.c b/lib/wrappers/libffi/msvc/ffi.c
new file mode 100644
index 000000000..6e595e9fe
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/ffi.c
@@ -0,0 +1,457 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+   
+   x86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+extern void Py_FatalError(const char *msg);
+
+/*@-exportheader@*/
+void ffi_prep_args(char *stack, extended_cif *ecif)
+/*@=exportheader@*/
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+  if (ecif->cif->rtype->type == FFI_TYPE_STRUCT)
+    {
+      *(void **) argp = ecif->rvalue;
+      argp += sizeof(void *);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void *) - 1) & (size_t) argp)
+	argp = (char *) ALIGN(argp, sizeof(void *));
+
+      z = (*p_arg)->size;
+      if (z < sizeof(int))
+	{
+	  z = sizeof(int);
+	  switch ((*p_arg)->type)
+	    {
+	    case FFI_TYPE_SINT8:
+	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT8:
+	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT16:
+	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT16:
+	      *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT32:
+	      *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT32:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_STRUCT:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    default:
+	      FFI_ASSERT(0);
+	    }
+	}
+      else
+	{
+	  memcpy(argp, *p_argv, z);
+	}
+      p_argv++;
+      argp += z;
+    }
+
+  if (argp >= stack && (unsigned)(argp - stack) > ecif->cif->bytes) 
+    {
+      Py_FatalError("FFI BUG: not enough stack space for arguments");
+    }
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef _WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    default:
+      cif->flags = FFI_TYPE_INT;
+      break;
+    }
+
+  return FFI_OK;
+}
+
+#ifdef _WIN32
+extern int
+ffi_call_x86(void (*)(char *, extended_cif *), 
+	     /*@out@*/ extended_cif *, 
+	     unsigned, unsigned, 
+	     /*@out@*/ unsigned *, 
+	     void (*fn)());
+#endif
+
+#ifdef _WIN64
+extern int
+ffi_call_AMD64(void (*)(char *, extended_cif *),
+		 /*@out@*/ extended_cif *,
+		 unsigned, unsigned,
+		 /*@out@*/ unsigned *,
+		 void (*fn)());
+#endif
+
+int
+ffi_call(/*@dependent@*/ ffi_cif *cif, 
+	 void (*fn)(), 
+	 /*@out@*/ void *rvalue, 
+	 /*@dependent@*/ void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return	*/
+  /* value address then we need to make one		        */
+
+  if ((rvalue == NULL) && 
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      /*@-sysunrecog@*/
+      ecif.rvalue = alloca(cif->rtype->size);
+      /*@=sysunrecog@*/
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#if !defined(_WIN64)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      return ffi_call_x86(ffi_prep_args, &ecif, cif->bytes, 
+			  cif->flags, ecif.rvalue, fn);
+      break;
+#else
+    case FFI_SYSV:
+      /*@-usedef@*/
+      /* Function call needs at least 40 bytes stack size, on win64 AMD64 */
+      return ffi_call_AMD64(ffi_prep_args, &ecif, cif->bytes ? cif->bytes : 40,
+			   cif->flags, ecif.rvalue, fn);
+      /*@=usedef@*/
+      break;
+#endif
+
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+  return -1; /* theller: Hrm. */
+}
+
+
+/** private members **/
+
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+					  void** args, ffi_cif* cif);
+/* This function is jumped to by the trampoline */
+
+#ifdef _WIN64
+void *
+#else
+static void __fastcall
+#endif
+ffi_closure_SYSV (ffi_closure *closure, int *argp)
+{
+  // this is our return value storage
+  long double    res;
+
+  // our various things...
+  ffi_cif       *cif;
+  void         **arg_area;
+  unsigned short rtype;
+  void          *resp = (void*)&res;
+  void *args = &argp[1];
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will re-set RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, (void**)&resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  rtype = cif->flags;
+
+#if defined(_WIN32) && !defined(_WIN64)
+#ifdef _MSC_VER
+  /* now, do a generic return based on the value of rtype */
+  if (rtype == FFI_TYPE_INT)
+    {
+	    _asm mov eax, resp ;
+	    _asm mov eax, [eax] ;
+    }
+  else if (rtype == FFI_TYPE_FLOAT)
+    {
+	    _asm mov eax, resp ;
+	    _asm fld DWORD PTR [eax] ;
+//      asm ("flds (%0)" : : "r" (resp) : "st" );
+    }
+  else if (rtype == FFI_TYPE_DOUBLE)
+    {
+	    _asm mov eax, resp ;
+	    _asm fld QWORD PTR [eax] ;
+//      asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_LONGDOUBLE)
+    {
+//      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_SINT64)
+    {
+	    _asm mov edx, resp ;
+	    _asm mov eax, [edx] ;
+	    _asm mov edx, [edx + 4] ;
+//      asm ("movl 0(%0),%%eax;"
+//	   "movl 4(%0),%%edx" 
+//	   : : "r"(resp)
+//	   : "eax", "edx");
+    }
+#else
+  /* now, do a generic return based on the value of rtype */
+  if (rtype == FFI_TYPE_INT)
+    {
+      asm ("movl (%0),%%eax" : : "r" (resp) : "eax");
+    }
+  else if (rtype == FFI_TYPE_FLOAT)
+    {
+      asm ("flds (%0)" : : "r" (resp) : "st" );
+    }
+  else if (rtype == FFI_TYPE_DOUBLE)
+    {
+      asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_LONGDOUBLE)
+    {
+      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_SINT64)
+    {
+      asm ("movl 0(%0),%%eax;"
+	   "movl 4(%0),%%edx" 
+	   : : "r"(resp)
+	   : "eax", "edx");
+    }
+#endif
+#endif
+
+#ifdef _WIN64
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+  */
+  return *(void **)resp;
+#endif
+}
+
+/*@-exportheader@*/
+static void 
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
+			    void **avalue, ffi_cif *cif)
+/*@=exportheader@*/
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if ( cif->rtype->type == FFI_TYPE_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += 4;
+  }
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(char *) - 1) & (size_t) argp) {
+	argp = (char *) ALIGN(argp, sizeof(char*));
+      }
+
+      z = (*p_arg)->size;
+
+      /* because we're little endian, this is what it turns into.   */
+
+      *p_argv = (void*) argp;
+
+      p_argv++;
+      argp += z;
+    }
+  
+  return;
+}
+
+/* the cif must already be prep'ed */
+extern void ffi_closure_OUTER();
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+					  ffi_cif* cif,
+					  void (*fun)(ffi_cif*,void*,void**,void*),
+					  void *user_data,
+					  void *codeloc)
+{
+  short bytes;
+  char *tramp;
+#ifdef _WIN64
+  int mask = 0;
+#endif
+  FFI_ASSERT (cif->abi == FFI_SYSV);
+  
+  if (cif->abi == FFI_SYSV)
+    bytes = 0;
+#if !defined(_WIN64)
+  else if (cif->abi == FFI_STDCALL)
+    bytes = cif->bytes;
+#endif
+  else
+    return FFI_BAD_ABI;
+
+  tramp = &closure->tramp[0];
+
+#define BYTES(text) memcpy(tramp, text, sizeof(text)), tramp += sizeof(text)-1
+#define POINTER(x) *(void**)tramp = (void*)(x), tramp += sizeof(void*)
+#define SHORT(x) *(short*)tramp = x, tramp += sizeof(short)
+#define INT(x) *(int*)tramp = x, tramp += sizeof(int)
+
+#ifdef _WIN64
+  if (cif->nargs >= 1 &&
+      (cif->arg_types[0]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[0]->type == FFI_TYPE_DOUBLE))
+    mask |= 1;
+  if (cif->nargs >= 2 &&
+      (cif->arg_types[1]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[1]->type == FFI_TYPE_DOUBLE))
+    mask |= 2;
+  if (cif->nargs >= 3 &&
+      (cif->arg_types[2]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[2]->type == FFI_TYPE_DOUBLE))
+    mask |= 4;
+  if (cif->nargs >= 4 &&
+      (cif->arg_types[3]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[3]->type == FFI_TYPE_DOUBLE))
+    mask |= 8;
+
+  /* 41 BB ----         mov         r11d,mask */
+  BYTES("\x41\xBB"); INT(mask);
+
+  /* 48 B8 --------     mov         rax, closure			*/
+  BYTES("\x48\xB8"); POINTER(closure);
+
+  /* 49 BA --------     mov         r10, ffi_closure_OUTER */
+  BYTES("\x49\xBA"); POINTER(ffi_closure_OUTER);
+
+  /* 41 FF E2           jmp         r10 */
+  BYTES("\x41\xFF\xE2");
+
+#else
+
+  /* mov ecx, closure */
+  BYTES("\xb9"); POINTER(closure);
+
+  /* mov edx, esp */
+  BYTES("\x8b\xd4");
+
+  /* call ffi_closure_SYSV */
+  BYTES("\xe8"); POINTER((char*)&ffi_closure_SYSV - (tramp + 4));
+
+  /* ret bytes */
+  BYTES("\xc2");
+  SHORT(bytes);
+  
+#endif
+
+  if (tramp - &closure->tramp[0] > FFI_TRAMPOLINE_SIZE)
+    Py_FatalError("FFI_TRAMPOLINE_SIZE too small in " __FILE__);
+
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+  return FFI_OK;
+}
diff --git a/lib/wrappers/libffi/msvc/prep_cif.c b/lib/wrappers/libffi/msvc/prep_cif.c
new file mode 100644
index 000000000..2650fa052
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/prep_cif.c
@@ -0,0 +1,175 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(/*@out@*/ ffi_type *arg)
+{
+  ffi_type **ptr; 
+
+  FFI_ASSERT(arg != NULL);
+
+  /*@-usedef@*/
+
+  FFI_ASSERT(arg->elements != NULL);
+  FFI_ASSERT(arg->size == 0);
+  FFI_ASSERT(arg->alignment == 0);
+
+  ptr = &(arg->elements[0]);
+
+  while ((*ptr) != NULL)
+    {
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+      
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ? 
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+
+  /*@=usedef@*/
+}
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif, 
+			ffi_abi abi, unsigned int nargs, 
+			/*@dependent@*/ /*@out@*/ /*@partial@*/ ffi_type *rtype, 
+			/*@dependent@*/ ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((abi > FFI_FIRST_ABI) && (abi <= FFI_DEFAULT_ABI));
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = nargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  /*@-usedef@*/
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+  /*@=usedef@*/
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86-64 and s390 stack space allocation is handled in prep_machdep.  */
+#if !defined M68K && !defined __x86_64__ && !defined S390
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+      /* MSVC returns small structures in registers.  But we have a different
+      workaround: pretend int32 or int64 return type, and converting to
+      structure afterwards. */
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+      )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this 
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined __x86_64__ && !defined S390
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+		/* Don't know if this is a libffi bug or not.  At least on
+		   Windows with MSVC, function call parameters are *not*
+		   aligned in the same way as structure fields are, they are
+		   only aligned in integer boundaries.
+
+		   This doesn't do any harm for cdecl functions and closures,
+		   since the caller cleans up the stack, but it is wrong for
+		   stdcall functions where the callee cleans.
+		*/
+
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+	  
+#endif
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+  return ffi_prep_cif_machdep(cif);
+}
diff --git a/lib/wrappers/libffi/msvc/types.c b/lib/wrappers/libffi/msvc/types.c
new file mode 100644
index 000000000..df32190d1
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/types.c
@@ -0,0 +1,104 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_INTEGRAL_TYPEDEF(n, s, a, t) ffi_type ffi_type_##n = { s, a, t, NULL }
+#define FFI_AGGREGATE_TYPEDEF(n, e) ffi_type ffi_type_##n = { 0, 0, FFI_TYPE_STRUCT, e }
+
+/* Size and alignment are fake here. They must not be 0. */
+FFI_INTEGRAL_TYPEDEF(void, 1, 1, FFI_TYPE_VOID);
+
+FFI_INTEGRAL_TYPEDEF(uint8, 1, 1, FFI_TYPE_UINT8);
+FFI_INTEGRAL_TYPEDEF(sint8, 1, 1, FFI_TYPE_SINT8);
+FFI_INTEGRAL_TYPEDEF(uint16, 2, 2, FFI_TYPE_UINT16);
+FFI_INTEGRAL_TYPEDEF(sint16, 2, 2, FFI_TYPE_SINT16);
+FFI_INTEGRAL_TYPEDEF(uint32, 4, 4, FFI_TYPE_UINT32);
+FFI_INTEGRAL_TYPEDEF(sint32, 4, 4, FFI_TYPE_SINT32);
+FFI_INTEGRAL_TYPEDEF(float, 4, 4, FFI_TYPE_FLOAT);
+
+#if defined ALPHA || defined SPARC64 || defined X86_64 || defined S390X \
+    || defined IA64
+
+FFI_INTEGRAL_TYPEDEF(pointer, 8, 8, FFI_TYPE_POINTER);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(pointer, 4, 4, FFI_TYPE_POINTER);
+
+#endif
+
+#if defined X86 || defined X86_WIN32 || defined ARM || defined M68K
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
+
+#elif defined SH
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 8, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 8, FFI_TYPE_SINT64);
+
+#endif
+
+
+#if defined X86 || defined X86_WIN32 || defined M68K
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 12, 4, FFI_TYPE_LONGDOUBLE);
+
+#elif defined ARM || defined SH || defined POWERPC_AIX || defined POWERPC_DARWIN
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 8, 4, FFI_TYPE_LONGDOUBLE);
+
+#elif defined SPARC
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+#ifdef SPARC64
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
+#else
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 8, FFI_TYPE_LONGDOUBLE);
+#endif
+
+#elif defined X86_64
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 8, 8, FFI_TYPE_LONGDOUBLE);
+
+#endif
+
diff --git a/lib/wrappers/libffi/msvc/win32.c b/lib/wrappers/libffi/msvc/win32.c
new file mode 100644
index 000000000..d1149a85e
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win32.c
@@ -0,0 +1,162 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* theller: almost verbatim translation from gas syntax to MSVC inline
+   assembler code. */
+
+/* theller: ffi_call_x86 now returns an integer - the difference of the stack
+   pointer before and after the function call.  If everything is ok, zero is
+   returned.  If stdcall functions are passed the wrong number of arguments,
+   the difference will be nonzero. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+__declspec(naked) int
+ffi_call_x86(void (* prepfunc)(char *, extended_cif *), /* 8 */
+	     extended_cif *ecif, /* 12 */
+	     unsigned bytes, /* 16 */
+	     unsigned flags, /* 20 */
+	     unsigned *rvalue, /* 24 */
+	     void (*fn)()) /* 28 */
+{
+	_asm {
+		push ebp
+		mov ebp, esp
+
+		push esi // NEW: this register must be preserved across function calls
+// XXX SAVE ESP NOW!
+		mov esi, esp		// save stack pointer before the call
+
+// Make room for all of the new args.
+		mov ecx, [ebp+16]
+		sub esp, ecx		// sub esp, bytes
+		
+		mov eax, esp
+
+// Place all of the ffi_prep_args in position
+		push [ebp + 12] // ecif
+		push eax
+		call [ebp + 8] // prepfunc
+
+// Return stack to previous state and call the function
+		add esp, 8
+// FIXME: Align the stack to a 128-bit boundary to avoid
+// potential performance hits.
+		call [ebp + 28]
+
+// Load ecif->cif->abi
+		mov ecx, [ebp + 12]
+		mov ecx, [ecx]ecif.cif
+		mov ecx, [ecx]ecif.cif.abi
+		
+		cmp ecx, FFI_STDCALL
+		je noclean
+// STDCALL: Remove the space we pushed for the args
+		mov ecx, [ebp + 16]
+		add esp, ecx
+// CDECL: Caller has already cleaned the stack
+noclean:
+// Check that esp has the same value as before!
+		sub esi, esp
+
+// Load %ecx with the return type code
+		mov ecx, [ebp + 20]
+
+// If the return value pointer is NULL, assume no return value.
+/*
+  Intel asm is weird. We have to explicitely specify 'DWORD PTR' in the nexr instruction,
+  otherwise only one BYTE will be compared (instead of a DWORD)!
+ */
+		cmp DWORD PTR [ebp + 24], 0
+		jne sc_retint
+
+// Even if there is no space for the return value, we are
+// obliged to handle floating-point values.
+		cmp ecx, FFI_TYPE_FLOAT
+		jne sc_noretval
+//        fstp  %st(0)
+		fstp st(0)
+
+		jmp sc_epilogue
+
+sc_retint:
+		cmp ecx, FFI_TYPE_INT
+		jne sc_retfloat
+//        # Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp + 24]
+		mov [ecx + 0], eax
+		jmp sc_epilogue
+
+sc_retfloat:
+		cmp ecx, FFI_TYPE_FLOAT
+		jne sc_retdouble
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+//        fstps (%ecx)
+		fstp DWORD PTR [ecx]
+		jmp sc_epilogue
+
+sc_retdouble:
+		cmp ecx, FFI_TYPE_DOUBLE
+		jne sc_retlongdouble
+//        movl  24(%ebp),%ecx
+		mov ecx, [ebp+24]
+		fstp QWORD PTR [ecx]
+		jmp sc_epilogue
+
+		jmp sc_retlongdouble // avoid warning about unused label
+sc_retlongdouble:
+		cmp ecx, FFI_TYPE_LONGDOUBLE
+		jne sc_retint64
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+//        fstpt (%ecx)
+		fstp QWORD PTR [ecx] /* XXX ??? */
+		jmp sc_epilogue
+
+sc_retint64:
+		cmp ecx, FFI_TYPE_SINT64
+		jne sc_retstruct
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+		mov [ecx+0], eax
+		mov [ecx+4], edx
+
+sc_retstruct:
+// Nothing to do!
+
+sc_noretval:
+sc_epilogue:
+		mov eax, esi
+		pop esi // NEW restore: must be preserved across function calls
+		mov esp, ebp
+		pop ebp
+		ret
+	}
+}
diff --git a/lib/wrappers/libffi/msvc/win32_asm.asm b/lib/wrappers/libffi/msvc/win32_asm.asm
new file mode 100644
index 000000000..407185e6a
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win32_asm.asm
@@ -0,0 +1,470 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+    ffi_prep_args : NEAR PTR DWORD,
+    ecif          : NEAR PTR DWORD,
+    cif_abi       : DWORD,
+    cif_bytes     : DWORD,
+    cif_flags     : DWORD,
+    rvalue        : NEAR PTR DWORD,
+    fn            : NEAR PTR DWORD
+
+        ;; Make room for all of the new args.
+        mov  ecx, cif_bytes
+        sub  esp, ecx
+
+        mov  eax, esp
+
+        ;; Place all of the ffi_prep_args in position
+        push ecif
+        push eax
+        call ffi_prep_args
+
+        ;; Return stack to previous state and call the function
+        add  esp, 8
+
+	;; Handle thiscall and fastcall
+	cmp cif_abi, 3 ;; FFI_THISCALL
+	jz do_thiscall
+	cmp cif_abi, 4 ;; FFI_FASTCALL
+	jnz do_stdcall
+	mov ecx, DWORD PTR [esp]
+	mov edx, DWORD PTR [esp+4]
+	add esp, 8
+	jmp do_stdcall
+do_thiscall:
+	mov ecx, DWORD PTR [esp]
+	add esp, 4
+do_stdcall:
+        call fn
+
+        ;; cdecl:   we restore esp in the epilogue, so there's no need to
+        ;;          remove the space we pushed for the args.
+        ;; stdcall: the callee has already cleaned the stack.
+
+        ;; Load ecx with the return type code
+        mov  ecx, cif_flags
+
+        ;; If the return value pointer is NULL, assume no return value.
+        cmp  rvalue, 0
+        jne  ca_jumptable
+
+        ;; Even if there is no space for the return value, we are
+        ;; obliged to handle floating-point values.
+        cmp  ecx, FFI_TYPE_FLOAT
+        jne  ca_epilogue
+        fstp st(0)
+
+        jmp  ca_epilogue
+
+ca_jumptable:
+        jmp  [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+        ;; Do not insert anything here between label and jump table.
+        dd offset ca_epilogue       ;; FFI_TYPE_VOID
+        dd offset ca_retint         ;; FFI_TYPE_INT
+        dd offset ca_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset ca_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset ca_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset ca_retuint8       ;; FFI_TYPE_UINT8
+        dd offset ca_retsint8       ;; FFI_TYPE_SINT8
+        dd offset ca_retuint16      ;; FFI_TYPE_UINT16
+        dd offset ca_retsint16      ;; FFI_TYPE_SINT16
+        dd offset ca_retint         ;; FFI_TYPE_UINT32
+        dd offset ca_retint         ;; FFI_TYPE_SINT32
+        dd offset ca_retint64       ;; FFI_TYPE_UINT64
+        dd offset ca_retint64       ;; FFI_TYPE_SINT64
+        dd offset ca_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset ca_retint         ;; FFI_TYPE_POINTER
+        dd offset ca_retstruct1b    ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset ca_retstruct2b    ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset ca_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset ca_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+        /* Sign/zero extend as appropriate.  */
+ca_retuint8:
+        movzx eax, al
+        jmp   ca_retint
+
+ca_retsint8:
+        movsx eax, al
+        jmp   ca_retint
+
+ca_retuint16:
+        movzx eax, ax
+        jmp   ca_retint
+
+ca_retsint16:
+        movsx eax, ax
+        jmp   ca_retint
+
+ca_retint:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        jmp   ca_epilogue
+
+ca_retint64:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        mov   [ecx + 4], edx
+        jmp   ca_epilogue
+
+ca_retfloat:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  DWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  QWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retlongdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  TBYTE PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retstruct1b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], al
+        jmp   ca_epilogue
+
+ca_retstruct2b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], ax
+        jmp   ca_epilogue
+
+ca_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_call_win32 ENDP
+
+ffi_closure_THISCALL PROC NEAR FORCEFRAME
+	sub	esp, 40
+	lea	edx, [ebp -24]
+	mov	[ebp - 12], edx	/* resp */
+	lea	edx, [ebp + 12]  /* account for stub return address on stack */
+	jmp	stub
+ffi_closure_THISCALL ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 8]
+stub::
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cs_jumptable:
+        jmp  [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cs_epilogue       ;; FFI_TYPE_VOID
+        dd offset cs_retint         ;; FFI_TYPE_INT
+        dd offset cs_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cs_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cs_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cs_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cs_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cs_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cs_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cs_retint         ;; FFI_TYPE_UINT32
+        dd offset cs_retint         ;; FFI_TYPE_SINT32
+        dd offset cs_retint64       ;; FFI_TYPE_UINT64
+        dd offset cs_retint64       ;; FFI_TYPE_SINT64
+        dd offset cs_retstruct      ;; FFI_TYPE_STRUCT
+        dd offset cs_retint         ;; FFI_TYPE_POINTER
+        dd offset cs_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cs_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cs_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cs_retmsstruct    ;; FFI_TYPE_MS_STRUCT
+
+cs_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retint:
+        mov   eax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cs_epilogue
+
+cs_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retstruct:
+        ;; Caller expects us to pop struct return value pointer hidden arg.
+        ;; Epilogue code is autogenerated.
+        ret	4
+
+cs_retmsstruct:
+        ;; Caller expects us to return a pointer to the real return value.
+        mov   eax, ecx
+        ;; Caller doesn't expects us to pop struct return value pointer hidden arg.
+        jmp   cs_epilogue
+
+cs_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
+	sub esp, 36
+	mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+	mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+	mov [esp + 12], edx
+	lea edx, [ebp + 12]
+	jmp stubraw
+ffi_closure_raw_THISCALL ENDP
+
+ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov  [esp + 12], edx                            ;; user_data
+        lea  edx, [ebp + 8]
+stubraw::
+        mov  [esp + 8], edx                             ;; raw_args
+        lea  edx, [ebp - 24]
+        mov  [esp + 4], edx                             ;; &res
+        mov  [esp], esi                                 ;; cif
+        call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET]   ;; closure->fun
+        mov  eax, [esi + CIF_FLAGS_OFFSET]              ;; cif->flags
+        lea  ecx, [ebp - 24]
+
+cr_jumptable:
+        jmp  [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cr_epilogue       ;; FFI_TYPE_VOID
+        dd offset cr_retint         ;; FFI_TYPE_INT
+        dd offset cr_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cr_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cr_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cr_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cr_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cr_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cr_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cr_retint         ;; FFI_TYPE_UINT32
+        dd offset cr_retint         ;; FFI_TYPE_SINT32
+        dd offset cr_retint64       ;; FFI_TYPE_UINT64
+        dd offset cr_retint64       ;; FFI_TYPE_SINT64
+        dd offset cr_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cr_retint         ;; FFI_TYPE_POINTER
+        dd offset cr_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cr_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cr_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cr_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+cr_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retint:
+        mov   eax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cr_epilogue
+
+cr_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 12]         ;; account for stub return address on stack
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cd_jumptable:
+        jmp  [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cd_epilogue       ;; FFI_TYPE_VOID
+        dd offset cd_retint         ;; FFI_TYPE_INT
+        dd offset cd_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cd_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cd_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cd_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cd_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cd_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cd_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cd_retint         ;; FFI_TYPE_UINT32
+        dd offset cd_retint         ;; FFI_TYPE_SINT32
+        dd offset cd_retint64       ;; FFI_TYPE_UINT64
+        dd offset cd_retint64       ;; FFI_TYPE_SINT64
+        dd offset cd_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cd_retint         ;; FFI_TYPE_POINTER
+        dd offset cd_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cd_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cd_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retint:
+        mov   eax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cd_epilogue
+
+cd_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
diff --git a/lib/wrappers/libffi/msvc/win64_asm.asm b/lib/wrappers/libffi/msvc/win64_asm.asm
new file mode 100644
index 000000000..301188bc9
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win64_asm.asm
@@ -0,0 +1,156 @@
+PUBLIC	ffi_call_AMD64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_SYSV:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_OUTER will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_SYSV for the actual work, then return the result.
+;;; 
+ffi_closure_OUTER PROC FRAME
+	;; save actual arguments to their stack space.
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	forth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+forth:
+	test	r11, 8
+	jne	forth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+forth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+.ALLOCSTACK 40
+	sub	rsp, 40
+.ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 40		; correct our own area
+	mov	rax, ffi_closure_SYSV
+	call	rax		; call the real closure function
+	;; Here, code is missing that handles float return values
+	add	rsp, 40
+	movd	xmm0, rax	; In case the closure returned a float.
+	ret	0
+ffi_closure_OUTER ENDP
+
+
+;;; ffi_call_AMD64
+
+stack$ = 0
+prepfunc$ = 32
+ecif$ = 40
+bytes$ = 48
+flags$ = 56
+rvalue$ = 64
+fn$ = 72
+
+ffi_call_AMD64 PROC FRAME
+
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+.PUSHREG rbp
+	push	rbp
+.ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+.SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+.ENDPROLOG
+
+	mov	eax, DWORD PTR bytes$[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR stack$[rbp], rax
+
+	mov	rdx, QWORD PTR ecif$[rbp]
+	mov	rcx, QWORD PTR stack$[rbp]
+	call	QWORD PTR prepfunc$[rbp]
+
+	mov	rsp, QWORD PTR stack$[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR fn$[rbp]
+ret_int$:
+ 	cmp	DWORD PTR flags$[rbp], 1 ; FFI_TYPE_INT
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR rvalue$[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	SHORT ret_nothing$
+
+ret_float$:
+ 	cmp	DWORD PTR flags$[rbp], 2 ; FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR rvalue$[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_nothing$
+
+ret_double$:
+ 	cmp	DWORD PTR flags$[rbp], 3 ; FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_int64$
+
+ 	mov	rax, QWORD PTR rvalue$[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_nothing$
+
+ret_int64$:
+  	cmp	DWORD PTR flags$[rbp], 12 ; FFI_TYPE_SINT64
+  	jne	ret_nothing$
+
+ 	mov	rcx, QWORD PTR rvalue$[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_nothing$
+	
+ret_nothing$:
+	xor	eax, eax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_AMD64 ENDP
+_TEXT	ENDS
+END
diff --git a/tests/compile/tbindtypedesc.nim b/tests/compile/tbindtypedesc.nim
index dd4ef854c..4ebfd12bb 100644
--- a/tests/compile/tbindtypedesc.nim
+++ b/tests/compile/tbindtypedesc.nim
@@ -44,6 +44,10 @@ good(genericParamOnce(TFoo, TFoo))
 bad(genericParamOnce(string, int))
 bad(genericParamOnce(TFoo, float))
 
+type
+  type1 = typedesc
+  type2 = typedesc
+
 proc typePairs(A, B: type1; C, D: type2) = nil
 
 good(typePairs(int, int, TFoo, TFOO))
diff --git a/tests/compile/tcomputedgoto.nim b/tests/compile/tcomputedgoto.nim
index 2c3b4bbd4..b21fc07a3 100644
--- a/tests/compile/tcomputedgoto.nim
+++ b/tests/compile/tcomputedgoto.nim
@@ -1,16 +1,16 @@
 discard """
-  output: '''yeah A
-yeah A
-yeah CD
-yeah CD
-yeah A
-yeah CD
-yeah CD
-yeah A
-yeah B
-yeah A
-yeah A
-yeah A'''
+  output: '''yeah A enumB
+yeah A enumB
+yeah CD enumD
+yeah CD enumE
+yeah A enumB
+yeah CD enumE
+yeah CD enumD
+yeah A enumB
+yeah B enumC
+yeah A enumB
+yeah A enumB
+yeah A enumB'''
 """
 
 type
@@ -32,13 +32,14 @@ proc vm() =
   while true:
     {.computedGoto.}
     let instr = instructions[pc]
+    let ra = instr.succ # instr.regA
     case instr
     of enumA:
-      echo "yeah A"
+      echo "yeah A ", ra
     of enumC, enumD:
-      echo "yeah CD"
+      echo "yeah CD ", ra
     of enumB:
-      echo "yeah B"
+      echo "yeah B ", ra
     of enumE:
       break
     inc(pc)
diff --git a/tests/compile/tircbot.nim b/tests/compile/tircbot.nim
index d16c99b69..6008838ff 100644
--- a/tests/compile/tircbot.nim
+++ b/tests/compile/tircbot.nim
@@ -257,7 +257,7 @@ proc limitCommitMsg(m: string): string =
 proc handleWebMessage(state: PState, line: string) =
   echo("Got message from hub: " & line)
   var json = parseJson(line)
-  if json.existsKey("payload"):
+  if json.hasKey("payload"):
     for i in 0..min(4, json["payload"]["commits"].len-1):
       var commit = json["payload"]["commits"][i]
       # Create the message
@@ -273,8 +273,8 @@ proc handleWebMessage(state: PState, line: string) =
 
       # Send message to #nimrod.
       state.ircClient.privmsg(joinChans[0], message)
-  elif json.existsKey("redisinfo"):
-    assert json["redisinfo"].existsKey("port")
+  elif json.hasKey("redisinfo"):
+    assert json["redisinfo"].hasKey("port")
     #let redisPort = json["redisinfo"]["port"].num
     state.dbConnected = true
 
diff --git a/tests/compile/tmacro2.nim b/tests/compile/tmacro2.nim
index e7bc648db..39324e497 100644
--- a/tests/compile/tmacro2.nim
+++ b/tests/compile/tmacro2.nim
@@ -26,7 +26,7 @@ macro importImpl_forward(name, returns): stmt {.immediate.} =
   p2.add newIdentNode("errors")
   p2.add newNimNode(nnkVarTy)
   p2.add newNimNode(nnkEmpty)
-  p2[1].add newNimNOde(nnkBracketExpr)
+  p2[1].add newNimNode(nnkBracketExpr)
   p2[1][0].add newIdentNode("seq")
   p2[1][0].add newIdentNode("string")
   res[3].add p2
diff --git a/tests/patterns/tor.nim b/tests/patterns/tor.nim
index 7de1a7fa1..833418919 100644
--- a/tests/patterns/tor.nim
+++ b/tests/patterns/tor.nim
@@ -1,5 +1,5 @@
 discard """
-  output: '''110
+  output: '''3060
 true'''
 """
 
diff --git a/tests/reject/t99bott.nim b/tests/reject/t99bott.nim
index 7ebfd61e9..d18cb0d5c 100644
--- a/tests/reject/t99bott.nim
+++ b/tests/reject/t99bott.nim
@@ -1,7 +1,7 @@
 discard """
   file: "t99bott.nim"
   line: 26
-  errormsg: "constant expression expected"
+  errormsg: "cannot evaluate at compile time: bn"
   disabled: false
 """
 ## 99 Bottles of Beer
diff --git a/tests/reject/tbind2.nim b/tests/reject/tbind2.nim
index 72a9844bb..e8e21ad02 100644
--- a/tests/reject/tbind2.nim
+++ b/tests/reject/tbind2.nim
@@ -1,6 +1,6 @@
 discard """
   file: "tbind2.nim"
-  line: 12
+  line: 14
   errormsg: "ambiguous call"
 """
 # Test the new ``bind`` keyword for templates
diff --git a/tests/reject/tdisallowif.nim b/tests/reject/tdisallowif.nim
index 10f54288a..18dfd1c82 100644
--- a/tests/reject/tdisallowif.nim
+++ b/tests/reject/tdisallowif.nim
@@ -1,6 +1,7 @@
 discard """
   line: 24
   errormsg: "usage of 'disallowIf' is a user-defined error"
+  disabled: true
 """
 
 template optZero{x+x}(x: int): int = x*3
@@ -25,4 +26,4 @@ if s[0] != "hi":
   echo "do it"
   echo "more branches"
 else:
-  nil
+  discard
diff --git a/tests/reject/teffects1.nim b/tests/reject/teffects1.nim
index 1c6c4bed8..f5eb56dc8 100644
--- a/tests/reject/teffects1.nim
+++ b/tests/reject/teffects1.nim
@@ -1,5 +1,5 @@
 discard """
-  line: 1804
+  line: 1840
   file: "system.nim"
   errormsg: "can raise an unlisted exception: ref EIO"
 """
diff --git a/tests/reject/tenummix.nim b/tests/reject/tenummix.nim
index f58e7989d..aaf0be2cb 100644
--- a/tests/reject/tenummix.nim
+++ b/tests/reject/tenummix.nim
@@ -1,6 +1,6 @@
 discard """
-  file: "system.nim"
-  line: 696
+  file: "tenummix.nim"
+  line: 11
   errormsg: "type mismatch"
 """
 
diff --git a/tests/reject/tnot.nim b/tests/reject/tnot.nim
index 1985ef666..cd0f538e6 100644
--- a/tests/reject/tnot.nim
+++ b/tests/reject/tnot.nim
@@ -1,5 +1,6 @@
 discard """
-  file: "system.nim"
+  file: "tnot.nim"
+  line: 14
   errormsg: "type mismatch"
 """
 # BUG: following compiles, but should not:
diff --git a/tests/reject/twrongconst.nim b/tests/reject/twrongconst.nim
index 16fe3bff6..e5b8a15bd 100644
--- a/tests/reject/twrongconst.nim
+++ b/tests/reject/twrongconst.nim
@@ -1,6 +1,6 @@
 discard """
-  output: "Error: constant expression expected"
-  line: 7
+  output: "Error: cannot evaluate at compile time: x"
+  line: 10
 """
 
 var x: array[100, char] 
diff --git a/tests/reject/twrongiter.nim b/tests/reject/twrongiter.nim
index 2d2502a6a..33394219b 100644
--- a/tests/reject/twrongiter.nim
+++ b/tests/reject/twrongiter.nim
@@ -1,5 +1,5 @@
 discard """
-line: 14
+line: 12
 errormsg: "type mismatch"
 """
 
diff --git a/todo.txt b/todo.txt
index da7585500..a6301ce0b 100644
--- a/todo.txt
+++ b/todo.txt
@@ -2,10 +2,7 @@ version 0.9.4
 =============
 
 - new VM:
-  - implement the glue to replace evals.nim
-  - implement missing magics
   - implement overflow checking
-  - implement the FFI
 
 - make 'bind' default for templates and introduce 'mixin'
 - special rule for ``[]=``
@@ -19,8 +16,9 @@ version 0.9.4
 Bugs
 ====
 
+- bug: 'type T = ref T' not recognized as illegal recursion
+- bug: type conversions concerning proc types are weird
 - compilation of niminst takes way too long. looks like a regression
-- simple closure iterator doesn't work
 - docgen: sometimes effects are listed twice
 - 'result' is not properly cleaned for NRVO --> use uninit checking instead
 - sneaking with qualifiedLookup() is really broken!
diff --git a/tools/detect/detect.nim b/tools/detect/detect.nim
index cf61c2823..87b682ad5 100644
--- a/tools/detect/detect.nim
+++ b/tools/detect/detect.nim
@@ -10,7 +10,7 @@
 import os, strutils
 
 const
-  cc = "gcc -o $1 $1.c"
+  cc = "gcc -o $# $#.c"
 
   cfile = """
 /* Generated by detect.nim */
@@ -37,6 +37,7 @@ var
   tl = ""
 
 proc myExec(cmd: string): bool = 
+  echo "CMD ", cmd
   return execShellCmd(cmd) == 0
 
 proc header(s: string): bool = 
@@ -46,7 +47,7 @@ proc header(s: string): bool =
     f.write("#include $1\n" % s)
     f.write("int main() { return 0; }\n")
     close(f)
-    result = myExec(cc % testh)
+    result = myExec(cc % [testh.addFileExt(ExeExt), testh])
     removeFile(addFileExt(testh, "c"))
   if result:
     addf(hd, "#include $1\n", s)
@@ -60,13 +61,16 @@ proc main =
   if open(f, addFileExt(gen, "c"), fmWrite): 
     f.write(cfile % [hd, tl, system.hostOS, system.hostCPU])
     close(f)
-  if not myExec(cc % gen): quit(1)
-  if not myExec("./" & gen): quit(1)
-  removeFile(addFileExt(gen, "c"))
+  if not myExec(cc % [gen.addFileExt(ExeExt), gen]): quit(1)
+  when defined(windows):
+    if not myExec(gen.addFileExt(ExeExt)): quit(1)
+  else:
+    if not myExec("./" & gen): quit(1)
+  #removeFile(addFileExt(gen, "c"))
   echo("Success")
 
 proc v(name: string, typ: TTypeKind=cint) = 
-  var n = if name[0] == '_': copy(name, 1) else: name
+  var n = if name[0] == '_': substr(name, 1) else: name
   var t = $typ
   case typ
   of pointer: 
@@ -369,7 +373,7 @@ if header("<pthread.h>"):
   #v("PTHREAD_MUTEX_INITIALIZER") 
   v("PTHREAD_MUTEX_NORMAL") 
   v("PTHREAD_MUTEX_RECURSIVE") #{.importc, header: "<pthread.h>".}: cint
-  v("PTHREAD_ONCE_INIT") #{.importc, header: "<pthread.h>".}: cint
+  #v("PTHREAD_ONCE_INIT") #{.importc, header: "<pthread.h>".}: cint
   v("PTHREAD_PRIO_INHERIT") #{.importc, header: "<pthread.h>".}: cint
   v("PTHREAD_PRIO_NONE") #{.importc, header: "<pthread.h>".}: cint
   v("PTHREAD_PRIO_PROTECT") #{.importc, header: "<pthread.h>".}: cint
@@ -820,5 +824,8 @@ if header("<spawn.h>"):
   v("POSIX_SPAWN_SETSIGDEF")
   v("POSIX_SPAWN_SETSIGMASK")
 
-main()
+if header("<stdio.h>"):
+  v "_IOFBF"
+  v "_IONBF"
 
+main()
diff --git a/tools/detect/macosx_consts.nim b/tools/detect/macosx_consts.nim
new file mode 100644
index 000000000..e8be56e5f
--- /dev/null
+++ b/tools/detect/macosx_consts.nim
@@ -0,0 +1,629 @@
+# Generated by detect.nim
+const
+  AIO_ALLDONE* = cint(1)
+  AIO_CANCELED* = cint(2)
+  AIO_NOTCANCELED* = cint(4)
+  LIO_NOP* = cint(0)
+  LIO_NOWAIT* = cint(1)
+  LIO_READ* = cint(1)
+  LIO_WAIT* = cint(2)
+  LIO_WRITE* = cint(2)
+  RTLD_LAZY* = cint(1)
+  RTLD_NOW* = cint(2)
+  RTLD_GLOBAL* = cint(8)
+  RTLD_LOCAL* = cint(4)
+  E2BIG* = cint(7)
+  EACCES* = cint(13)
+  EADDRINUSE* = cint(48)
+  EADDRNOTAVAIL* = cint(49)
+  EAFNOSUPPORT* = cint(47)
+  EAGAIN* = cint(35)
+  EALREADY* = cint(37)
+  EBADF* = cint(9)
+  EBADMSG* = cint(94)
+  EBUSY* = cint(16)
+  ECANCELED* = cint(89)
+  ECHILD* = cint(10)
+  ECONNABORTED* = cint(53)
+  ECONNREFUSED* = cint(61)
+  ECONNRESET* = cint(54)
+  EDEADLK* = cint(11)
+  EDESTADDRREQ* = cint(39)
+  EDOM* = cint(33)
+  EDQUOT* = cint(69)
+  EEXIST* = cint(17)
+  EFAULT* = cint(14)
+  EFBIG* = cint(27)
+  EHOSTUNREACH* = cint(65)
+  EIDRM* = cint(90)
+  EILSEQ* = cint(92)
+  EINPROGRESS* = cint(36)
+  EINTR* = cint(4)
+  EINVAL* = cint(22)
+  EIO* = cint(5)
+  EISCONN* = cint(56)
+  EISDIR* = cint(21)
+  ELOOP* = cint(62)
+  EMFILE* = cint(24)
+  EMLINK* = cint(31)
+  EMSGSIZE* = cint(40)
+  EMULTIHOP* = cint(95)
+  ENAMETOOLONG* = cint(63)
+  ENETDOWN* = cint(50)
+  ENETRESET* = cint(52)
+  ENETUNREACH* = cint(51)
+  ENFILE* = cint(23)
+  ENOBUFS* = cint(55)
+  ENODATA* = cint(96)
+  ENODEV* = cint(19)
+  ENOENT* = cint(2)
+  ENOEXEC* = cint(8)
+  ENOLCK* = cint(77)
+  ENOLINK* = cint(97)
+  ENOMEM* = cint(12)
+  ENOMSG* = cint(91)
+  ENOPROTOOPT* = cint(42)
+  ENOSPC* = cint(28)
+  ENOSR* = cint(98)
+  ENOSTR* = cint(99)
+  ENOSYS* = cint(78)
+  ENOTCONN* = cint(57)
+  ENOTDIR* = cint(20)
+  ENOTEMPTY* = cint(66)
+  ENOTSOCK* = cint(38)
+  ENOTSUP* = cint(45)
+  ENOTTY* = cint(25)
+  ENXIO* = cint(6)
+  EOPNOTSUPP* = cint(102)
+  EOVERFLOW* = cint(84)
+  EPERM* = cint(1)
+  EPIPE* = cint(32)
+  EPROTO* = cint(100)
+  EPROTONOSUPPORT* = cint(43)
+  EPROTOTYPE* = cint(41)
+  ERANGE* = cint(34)
+  EROFS* = cint(30)
+  ESPIPE* = cint(29)
+  ESRCH* = cint(3)
+  ESTALE* = cint(70)
+  ETIME* = cint(101)
+  ETIMEDOUT* = cint(60)
+  ETXTBSY* = cint(26)
+  EWOULDBLOCK* = cint(35)
+  EXDEV* = cint(18)
+  F_DUPFD* = cint(0)
+  F_GETFD* = cint(1)
+  F_SETFD* = cint(2)
+  F_GETFL* = cint(3)
+  F_SETFL* = cint(4)
+  F_GETLK* = cint(7)
+  F_SETLK* = cint(8)
+  F_SETLKW* = cint(9)
+  F_GETOWN* = cint(5)
+  F_SETOWN* = cint(6)
+  FD_CLOEXEC* = cint(1)
+  F_RDLCK* = cint(1)
+  F_UNLCK* = cint(2)
+  F_WRLCK* = cint(3)
+  O_CREAT* = cint(512)
+  O_EXCL* = cint(2048)
+  O_NOCTTY* = cint(131072)
+  O_TRUNC* = cint(1024)
+  O_APPEND* = cint(8)
+  O_DSYNC* = cint(4194304)
+  O_NONBLOCK* = cint(4)
+  O_SYNC* = cint(128)
+  O_ACCMODE* = cint(3)
+  O_RDONLY* = cint(0)
+  O_RDWR* = cint(2)
+  O_WRONLY* = cint(1)
+  FE_DIVBYZERO* = cint(4)
+  FE_INEXACT* = cint(32)
+  FE_INVALID* = cint(1)
+  FE_OVERFLOW* = cint(8)
+  FE_UNDERFLOW* = cint(16)
+  FE_ALL_EXCEPT* = cint(63)
+  FE_DOWNWARD* = cint(1024)
+  FE_TONEAREST* = cint(0)
+  FE_TOWARDZERO* = cint(3072)
+  FE_UPWARD* = cint(2048)
+  FE_DFL_ENV* = when defined(amd64): cast[pointer](0x7fff9533b1b4)
+                else: cast[pointer](0x904797f4)
+  MM_HARD* = cint(1)
+  MM_SOFT* = cint(2)
+  MM_FIRM* = cint(4)
+  MM_APPL* = cint(16)
+  MM_UTIL* = cint(32)
+  MM_OPSYS* = cint(64)
+  MM_RECOVER* = cint(4096)
+  MM_NRECOV* = cint(8192)
+  MM_HALT* = cint(1)
+  MM_ERROR* = cint(2)
+  MM_WARNING* = cint(3)
+  MM_INFO* = cint(4)
+  MM_NOSEV* = cint(0)
+  MM_PRINT* = cint(256)
+  MM_CONSOLE* = cint(512)
+  MM_OK* = cint(0)
+  MM_NOTOK* = cint(3)
+  MM_NOMSG* = cint(1)
+  MM_NOCON* = cint(2)
+  FNM_NOMATCH* = cint(1)
+  FNM_PATHNAME* = cint(2)
+  FNM_PERIOD* = cint(4)
+  FNM_NOESCAPE* = cint(1)
+  FNM_NOSYS* = cint(-1)
+  FTW_F* = cint(0)
+  FTW_D* = cint(1)
+  FTW_DNR* = cint(2)
+  FTW_DP* = cint(3)
+  FTW_NS* = cint(4)
+  FTW_SL* = cint(5)
+  FTW_SLN* = cint(6)
+  FTW_PHYS* = cint(1)
+  FTW_MOUNT* = cint(2)
+  FTW_DEPTH* = cint(4)
+  FTW_CHDIR* = cint(8)
+  GLOB_APPEND* = cint(1)
+  GLOB_DOOFFS* = cint(2)
+  GLOB_ERR* = cint(4)
+  GLOB_MARK* = cint(8)
+  GLOB_NOCHECK* = cint(16)
+  GLOB_NOESCAPE* = cint(8192)
+  GLOB_NOSORT* = cint(32)
+  GLOB_ABORTED* = cint(-2)
+  GLOB_NOMATCH* = cint(-3)
+  GLOB_NOSPACE* = cint(-1)
+  GLOB_NOSYS* = cint(-4)
+  CODESET* = cint(0)
+  D_T_FMT* = cint(1)
+  D_FMT* = cint(2)
+  T_FMT* = cint(3)
+  T_FMT_AMPM* = cint(4)
+  AM_STR* = cint(5)
+  PM_STR* = cint(6)
+  DAY_1* = cint(7)
+  DAY_2* = cint(8)
+  DAY_3* = cint(9)
+  DAY_4* = cint(10)
+  DAY_5* = cint(11)
+  DAY_6* = cint(12)
+  DAY_7* = cint(13)
+  ABDAY_1* = cint(14)
+  ABDAY_2* = cint(15)
+  ABDAY_3* = cint(16)
+  ABDAY_4* = cint(17)
+  ABDAY_5* = cint(18)
+  ABDAY_6* = cint(19)
+  ABDAY_7* = cint(20)
+  MON_1* = cint(21)
+  MON_2* = cint(22)
+  MON_3* = cint(23)
+  MON_4* = cint(24)
+  MON_5* = cint(25)
+  MON_6* = cint(26)
+  MON_7* = cint(27)
+  MON_8* = cint(28)
+  MON_9* = cint(29)
+  MON_10* = cint(30)
+  MON_11* = cint(31)
+  MON_12* = cint(32)
+  ABMON_1* = cint(33)
+  ABMON_2* = cint(34)
+  ABMON_3* = cint(35)
+  ABMON_4* = cint(36)
+  ABMON_5* = cint(37)
+  ABMON_6* = cint(38)
+  ABMON_7* = cint(39)
+  ABMON_8* = cint(40)
+  ABMON_9* = cint(41)
+  ABMON_10* = cint(42)
+  ABMON_11* = cint(43)
+  ABMON_12* = cint(44)
+  ERA* = cint(45)
+  ERA_D_FMT* = cint(46)
+  ERA_D_T_FMT* = cint(47)
+  ERA_T_FMT* = cint(48)
+  ALT_DIGITS* = cint(49)
+  RADIXCHAR* = cint(50)
+  THOUSEP* = cint(51)
+  YESEXPR* = cint(52)
+  NOEXPR* = cint(53)
+  CRNCYSTR* = cint(56)
+  LC_ALL* = cint(0)
+  LC_COLLATE* = cint(1)
+  LC_CTYPE* = cint(2)
+  LC_MESSAGES* = cint(6)
+  LC_MONETARY* = cint(3)
+  LC_NUMERIC* = cint(4)
+  LC_TIME* = cint(5)
+  PTHREAD_CANCEL_ASYNCHRONOUS* = cint(0)
+  PTHREAD_CANCEL_ENABLE* = cint(1)
+  PTHREAD_CANCEL_DEFERRED* = cint(2)
+  PTHREAD_CANCEL_DISABLE* = cint(0)
+  PTHREAD_CREATE_DETACHED* = cint(2)
+  PTHREAD_CREATE_JOINABLE* = cint(1)
+  PTHREAD_EXPLICIT_SCHED* = cint(2)
+  PTHREAD_INHERIT_SCHED* = cint(1)
+  PTHREAD_MUTEX_DEFAULT* = cint(0)
+  PTHREAD_MUTEX_ERRORCHECK* = cint(1)
+  PTHREAD_MUTEX_NORMAL* = cint(0)
+  PTHREAD_MUTEX_RECURSIVE* = cint(2)
+  PTHREAD_PRIO_INHERIT* = cint(1)
+  PTHREAD_PRIO_NONE* = cint(0)
+  PTHREAD_PRIO_PROTECT* = cint(2)
+  PTHREAD_PROCESS_SHARED* = cint(1)
+  PTHREAD_PROCESS_PRIVATE* = cint(2)
+  PTHREAD_SCOPE_PROCESS* = cint(2)
+  PTHREAD_SCOPE_SYSTEM* = cint(1)
+  F_OK* = cint(0)
+  R_OK* = cint(4)
+  W_OK* = cint(2)
+  X_OK* = cint(1)
+  CS_PATH* = cint(1)
+  CS_POSIX_V6_ILP32_OFF32_CFLAGS* = cint(2)
+  CS_POSIX_V6_ILP32_OFF32_LDFLAGS* = cint(3)
+  CS_POSIX_V6_ILP32_OFF32_LIBS* = cint(4)
+  CS_POSIX_V6_ILP32_OFFBIG_CFLAGS* = cint(5)
+  CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS* = cint(6)
+  CS_POSIX_V6_ILP32_OFFBIG_LIBS* = cint(7)
+  CS_POSIX_V6_LP64_OFF64_CFLAGS* = cint(8)
+  CS_POSIX_V6_LP64_OFF64_LDFLAGS* = cint(9)
+  CS_POSIX_V6_LP64_OFF64_LIBS* = cint(10)
+  CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS* = cint(11)
+  CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS* = cint(12)
+  CS_POSIX_V6_LPBIG_OFFBIG_LIBS* = cint(13)
+  CS_POSIX_V6_WIDTH_RESTRICTED_ENVS* = cint(14)
+  F_LOCK* = cint(1)
+  F_TEST* = cint(3)
+  F_TLOCK* = cint(2)
+  F_ULOCK* = cint(0)
+  PC_2_SYMLINKS* = cint(15)
+  PC_ALLOC_SIZE_MIN* = cint(16)
+  PC_ASYNC_IO* = cint(17)
+  PC_CHOWN_RESTRICTED* = cint(7)
+  PC_FILESIZEBITS* = cint(18)
+  PC_LINK_MAX* = cint(1)
+  PC_MAX_CANON* = cint(2)
+  PC_MAX_INPUT* = cint(3)
+  PC_NAME_MAX* = cint(4)
+  PC_NO_TRUNC* = cint(8)
+  PC_PATH_MAX* = cint(5)
+  PC_PIPE_BUF* = cint(6)
+  PC_PRIO_IO* = cint(19)
+  PC_REC_INCR_XFER_SIZE* = cint(20)
+  PC_REC_MIN_XFER_SIZE* = cint(22)
+  PC_REC_XFER_ALIGN* = cint(23)
+  PC_SYMLINK_MAX* = cint(24)
+  PC_SYNC_IO* = cint(25)
+  PC_VDISABLE* = cint(9)
+  SC_2_C_BIND* = cint(18)
+  SC_2_C_DEV* = cint(19)
+  SC_2_CHAR_TERM* = cint(20)
+  SC_2_FORT_DEV* = cint(21)
+  SC_2_FORT_RUN* = cint(22)
+  SC_2_LOCALEDEF* = cint(23)
+  SC_2_PBS* = cint(59)
+  SC_2_PBS_ACCOUNTING* = cint(60)
+  SC_2_PBS_CHECKPOINT* = cint(61)
+  SC_2_PBS_LOCATE* = cint(62)
+  SC_2_PBS_MESSAGE* = cint(63)
+  SC_2_PBS_TRACK* = cint(64)
+  SC_2_SW_DEV* = cint(24)
+  SC_2_UPE* = cint(25)
+  SC_2_VERSION* = cint(17)
+  SC_ADVISORY_INFO* = cint(65)
+  SC_AIO_LISTIO_MAX* = cint(42)
+  SC_AIO_MAX* = cint(43)
+  SC_AIO_PRIO_DELTA_MAX* = cint(44)
+  SC_ARG_MAX* = cint(1)
+  SC_ASYNCHRONOUS_IO* = cint(28)
+  SC_ATEXIT_MAX* = cint(107)
+  SC_BARRIERS* = cint(66)
+  SC_BC_BASE_MAX* = cint(9)
+  SC_BC_DIM_MAX* = cint(10)
+  SC_BC_SCALE_MAX* = cint(11)
+  SC_BC_STRING_MAX* = cint(12)
+  SC_CHILD_MAX* = cint(2)
+  SC_CLK_TCK* = cint(3)
+  SC_CLOCK_SELECTION* = cint(67)
+  SC_COLL_WEIGHTS_MAX* = cint(13)
+  SC_CPUTIME* = cint(68)
+  SC_DELAYTIMER_MAX* = cint(45)
+  SC_EXPR_NEST_MAX* = cint(14)
+  SC_FSYNC* = cint(38)
+  SC_GETGR_R_SIZE_MAX* = cint(70)
+  SC_GETPW_R_SIZE_MAX* = cint(71)
+  SC_HOST_NAME_MAX* = cint(72)
+  SC_IOV_MAX* = cint(56)
+  SC_IPV6* = cint(118)
+  SC_JOB_CONTROL* = cint(6)
+  SC_LINE_MAX* = cint(15)
+  SC_LOGIN_NAME_MAX* = cint(73)
+  SC_MAPPED_FILES* = cint(47)
+  SC_MEMLOCK* = cint(30)
+  SC_MEMLOCK_RANGE* = cint(31)
+  SC_MEMORY_PROTECTION* = cint(32)
+  SC_MESSAGE_PASSING* = cint(33)
+  SC_MONOTONIC_CLOCK* = cint(74)
+  SC_MQ_OPEN_MAX* = cint(46)
+  SC_MQ_PRIO_MAX* = cint(75)
+  SC_NGROUPS_MAX* = cint(4)
+  SC_OPEN_MAX* = cint(5)
+  SC_PAGE_SIZE* = cint(29)
+  SC_PRIORITIZED_IO* = cint(34)
+  SC_PRIORITY_SCHEDULING* = cint(35)
+  SC_RAW_SOCKETS* = cint(119)
+  SC_RE_DUP_MAX* = cint(16)
+  SC_READER_WRITER_LOCKS* = cint(76)
+  SC_REALTIME_SIGNALS* = cint(36)
+  SC_REGEXP* = cint(77)
+  SC_RTSIG_MAX* = cint(48)
+  SC_SAVED_IDS* = cint(7)
+  SC_SEM_NSEMS_MAX* = cint(49)
+  SC_SEM_VALUE_MAX* = cint(50)
+  SC_SEMAPHORES* = cint(37)
+  SC_SHARED_MEMORY_OBJECTS* = cint(39)
+  SC_SHELL* = cint(78)
+  SC_SIGQUEUE_MAX* = cint(51)
+  SC_SPAWN* = cint(79)
+  SC_SPIN_LOCKS* = cint(80)
+  SC_SPORADIC_SERVER* = cint(81)
+  SC_SS_REPL_MAX* = cint(126)
+  SC_STREAM_MAX* = cint(26)
+  SC_SYMLOOP_MAX* = cint(120)
+  SC_SYNCHRONIZED_IO* = cint(40)
+  SC_THREAD_ATTR_STACKADDR* = cint(82)
+  SC_THREAD_ATTR_STACKSIZE* = cint(83)
+  SC_THREAD_CPUTIME* = cint(84)
+  SC_THREAD_DESTRUCTOR_ITERATIONS* = cint(85)
+  SC_THREAD_KEYS_MAX* = cint(86)
+  SC_THREAD_PRIO_INHERIT* = cint(87)
+  SC_THREAD_PRIO_PROTECT* = cint(88)
+  SC_THREAD_PRIORITY_SCHEDULING* = cint(89)
+  SC_THREAD_PROCESS_SHARED* = cint(90)
+  SC_THREAD_SAFE_FUNCTIONS* = cint(91)
+  SC_THREAD_SPORADIC_SERVER* = cint(92)
+  SC_THREAD_STACK_MIN* = cint(93)
+  SC_THREAD_THREADS_MAX* = cint(94)
+  SC_THREADS* = cint(96)
+  SC_TIMEOUTS* = cint(95)
+  SC_TIMER_MAX* = cint(52)
+  SC_TIMERS* = cint(41)
+  SC_TRACE* = cint(97)
+  SC_TRACE_EVENT_FILTER* = cint(98)
+  SC_TRACE_EVENT_NAME_MAX* = cint(127)
+  SC_TRACE_INHERIT* = cint(99)
+  SC_TRACE_LOG* = cint(100)
+  SC_TRACE_NAME_MAX* = cint(128)
+  SC_TRACE_SYS_MAX* = cint(129)
+  SC_TRACE_USER_EVENT_MAX* = cint(130)
+  SC_TTY_NAME_MAX* = cint(101)
+  SC_TYPED_MEMORY_OBJECTS* = cint(102)
+  SC_TZNAME_MAX* = cint(27)
+  SC_V6_ILP32_OFF32* = cint(103)
+  SC_V6_ILP32_OFFBIG* = cint(104)
+  SC_V6_LP64_OFF64* = cint(105)
+  SC_V6_LPBIG_OFFBIG* = cint(106)
+  SC_VERSION* = cint(8)
+  SC_XBS5_ILP32_OFF32* = cint(122)
+  SC_XBS5_ILP32_OFFBIG* = cint(123)
+  SC_XBS5_LP64_OFF64* = cint(124)
+  SC_XBS5_LPBIG_OFFBIG* = cint(125)
+  SC_XOPEN_CRYPT* = cint(108)
+  SC_XOPEN_ENH_I18N* = cint(109)
+  SC_XOPEN_LEGACY* = cint(110)
+  SC_XOPEN_REALTIME* = cint(111)
+  SC_XOPEN_REALTIME_THREADS* = cint(112)
+  SC_XOPEN_SHM* = cint(113)
+  SC_XOPEN_STREAMS* = cint(114)
+  SC_XOPEN_UNIX* = cint(115)
+  SC_XOPEN_VERSION* = cint(116)
+  SEEK_SET* = cint(0)
+  SEEK_CUR* = cint(1)
+  SEEK_END* = cint(2)
+  SEM_FAILED* = cast[pointer](-1)
+  IPC_CREAT* = cint(512)
+  IPC_EXCL* = cint(1024)
+  IPC_NOWAIT* = cint(2048)
+  IPC_PRIVATE* = cint(0)
+  IPC_RMID* = cint(0)
+  IPC_SET* = cint(1)
+  IPC_STAT* = cint(2)
+  S_IFMT* = cint(61440)
+  S_IFBLK* = cint(24576)
+  S_IFCHR* = cint(8192)
+  S_IFIFO* = cint(4096)
+  S_IFREG* = cint(32768)
+  S_IFDIR* = cint(16384)
+  S_IFLNK* = cint(40960)
+  S_IFSOCK* = cint(49152)
+  S_IRWXU* = cint(448)
+  S_IRUSR* = cint(256)
+  S_IWUSR* = cint(128)
+  S_IXUSR* = cint(64)
+  S_IRWXG* = cint(56)
+  S_IRGRP* = cint(32)
+  S_IWGRP* = cint(16)
+  S_IXGRP* = cint(8)
+  S_IRWXO* = cint(7)
+  S_IROTH* = cint(4)
+  S_IWOTH* = cint(2)
+  S_IXOTH* = cint(1)
+  S_ISUID* = cint(2048)
+  S_ISGID* = cint(1024)
+  S_ISVTX* = cint(512)
+  ST_RDONLY* = cint(1)
+  ST_NOSUID* = cint(2)
+  PROT_READ* = cint(1)
+  PROT_WRITE* = cint(2)
+  PROT_EXEC* = cint(4)
+  PROT_NONE* = cint(0)
+  MAP_SHARED* = cint(1)
+  MAP_PRIVATE* = cint(2)
+  MAP_FIXED* = cint(16)
+  MS_ASYNC* = cint(1)
+  MS_SYNC* = cint(16)
+  MS_INVALIDATE* = cint(2)
+  MCL_CURRENT* = cint(1)
+  MCL_FUTURE* = cint(2)
+  MAP_FAILED* = cast[pointer](-1)
+  POSIX_MADV_NORMAL* = cint(0)
+  POSIX_MADV_SEQUENTIAL* = cint(2)
+  POSIX_MADV_RANDOM* = cint(1)
+  POSIX_MADV_WILLNEED* = cint(3)
+  POSIX_MADV_DONTNEED* = cint(4)
+  CLOCKS_PER_SEC* = clong(1000000)
+  WNOHANG* = cint(1)
+  WUNTRACED* = cint(2)
+  WEXITED* = cint(4)
+  WSTOPPED* = cint(8)
+  WCONTINUED* = cint(16)
+  WNOWAIT* = cint(32)
+  SIGEV_NONE* = cint(0)
+  SIGEV_SIGNAL* = cint(1)
+  SIGEV_THREAD* = cint(3)
+  SIGABRT* = cint(6)
+  SIGALRM* = cint(14)
+  SIGBUS* = cint(10)
+  SIGCHLD* = cint(20)
+  SIGCONT* = cint(19)
+  SIGFPE* = cint(8)
+  SIGHUP* = cint(1)
+  SIGILL* = cint(4)
+  SIGINT* = cint(2)
+  SIGKILL* = cint(9)
+  SIGPIPE* = cint(13)
+  SIGQUIT* = cint(3)
+  SIGSEGV* = cint(11)
+  SIGSTOP* = cint(17)
+  SIGTERM* = cint(15)
+  SIGTSTP* = cint(18)
+  SIGTTIN* = cint(21)
+  SIGTTOU* = cint(22)
+  SIGUSR1* = cint(30)
+  SIGUSR2* = cint(31)
+  SIGPROF* = cint(27)
+  SIGSYS* = cint(12)
+  SIGTRAP* = cint(5)
+  SIGURG* = cint(16)
+  SIGVTALRM* = cint(26)
+  SIGXCPU* = cint(24)
+  SIGXFSZ* = cint(25)
+  SA_NOCLDSTOP* = cint(8)
+  SIG_BLOCK* = cint(1)
+  SIG_UNBLOCK* = cint(2)
+  SIG_SETMASK* = cint(3)
+  SA_ONSTACK* = cint(1)
+  SA_RESETHAND* = cint(4)
+  SA_RESTART* = cint(2)
+  SA_SIGINFO* = cint(64)
+  SA_NOCLDWAIT* = cint(32)
+  SA_NODEFER* = cint(16)
+  SS_ONSTACK* = cint(1)
+  SS_DISABLE* = cint(4)
+  MINSIGSTKSZ* = cint(32768)
+  SIGSTKSZ* = cint(131072)
+  NL_SETD* = cint(1)
+  NL_CAT_LOCALE* = cint(1)
+  SCHED_FIFO* = cint(4)
+  SCHED_RR* = cint(2)
+  SCHED_OTHER* = cint(1)
+  FD_SETSIZE* = cint(1024)
+  SCM_RIGHTS* = cint(1)
+  SOCK_DGRAM* = cint(2)
+  SOCK_RAW* = cint(3)
+  SOCK_SEQPACKET* = cint(5)
+  SOCK_STREAM* = cint(1)
+  SOL_SOCKET* = cint(65535)
+  SO_ACCEPTCONN* = cint(2)
+  SO_BROADCAST* = cint(32)
+  SO_DEBUG* = cint(1)
+  SO_DONTROUTE* = cint(16)
+  SO_ERROR* = cint(4103)
+  SO_KEEPALIVE* = cint(8)
+  SO_LINGER* = cint(128)
+  SO_OOBINLINE* = cint(256)
+  SO_RCVBUF* = cint(4098)
+  SO_RCVLOWAT* = cint(4100)
+  SO_RCVTIMEO* = cint(4102)
+  SO_REUSEADDR* = cint(4)
+  SO_SNDBUF* = cint(4097)
+  SO_SNDLOWAT* = cint(4099)
+  SO_SNDTIMEO* = cint(4101)
+  SO_TYPE* = cint(4104)
+  SOMAXCONN* = cint(128)
+  MSG_CTRUNC* = cint(32)
+  MSG_DONTROUTE* = cint(4)
+  MSG_EOR* = cint(8)
+  MSG_OOB* = cint(1)
+  MSG_PEEK* = cint(2)
+  MSG_TRUNC* = cint(16)
+  MSG_WAITALL* = cint(64)
+  AF_INET* = cint(2)
+  AF_INET6* = cint(30)
+  AF_UNIX* = cint(1)
+  AF_UNSPEC* = cint(0)
+  SHUT_RD* = cint(0)
+  SHUT_RDWR* = cint(2)
+  SHUT_WR* = cint(1)
+  IPPROTO_IP* = cint(0)
+  IPPROTO_IPV6* = cint(41)
+  IPPROTO_ICMP* = cint(1)
+  IPPROTO_RAW* = cint(255)
+  IPPROTO_TCP* = cint(6)
+  IPPROTO_UDP* = cint(17)
+  INADDR_ANY* = cint(0)
+  INADDR_BROADCAST* = cint(-1)
+  INET_ADDRSTRLEN* = cint(16)
+  IPV6_JOIN_GROUP* = cint(12)
+  IPV6_LEAVE_GROUP* = cint(13)
+  IPV6_MULTICAST_HOPS* = cint(10)
+  IPV6_MULTICAST_IF* = cint(9)
+  IPV6_MULTICAST_LOOP* = cint(11)
+  IPV6_UNICAST_HOPS* = cint(4)
+  IPV6_V6ONLY* = cint(27)
+  IPPORT_RESERVED* = cint(1024)
+  HOST_NOT_FOUND* = cint(1)
+  NO_DATA* = cint(4)
+  NO_RECOVERY* = cint(3)
+  TRY_AGAIN* = cint(2)
+  AI_PASSIVE* = cint(1)
+  AI_CANONNAME* = cint(2)
+  AI_NUMERICHOST* = cint(4)
+  AI_NUMERICSERV* = cint(4096)
+  AI_V4MAPPED* = cint(2048)
+  AI_ALL* = cint(256)
+  AI_ADDRCONFIG* = cint(1024)
+  NI_NOFQDN* = cint(1)
+  NI_NUMERICHOST* = cint(2)
+  NI_NAMEREQD* = cint(4)
+  NI_NUMERICSERV* = cint(8)
+  NI_DGRAM* = cint(16)
+  EAI_AGAIN* = cint(2)
+  EAI_BADFLAGS* = cint(3)
+  EAI_FAIL* = cint(4)
+  EAI_FAMILY* = cint(5)
+  EAI_MEMORY* = cint(6)
+  EAI_NONAME* = cint(8)
+  EAI_SERVICE* = cint(9)
+  EAI_SOCKTYPE* = cint(10)
+  EAI_SYSTEM* = cint(11)
+  EAI_OVERFLOW* = cint(14)
+  POLLIN* = cshort(1)
+  POLLRDNORM* = cshort(64)
+  POLLRDBAND* = cshort(128)
+  POLLPRI* = cshort(2)
+  POLLOUT* = cshort(4)
+  POLLWRNORM* = cshort(4)
+  POLLWRBAND* = cshort(256)
+  POLLERR* = cshort(8)
+  POLLHUP* = cshort(16)
+  POLLNVAL* = cshort(32)
+  POSIX_SPAWN_RESETIDS* = cint(1)
+  POSIX_SPAWN_SETPGROUP* = cint(2)
+  POSIX_SPAWN_SETSIGDEF* = cint(4)
+  POSIX_SPAWN_SETSIGMASK* = cint(8)
+  IOFBF* = cint(0)
+  IONBF* = cint(2)
+
diff --git a/tools/detect/windows_amd64_consts.nim b/tools/detect/windows_amd64_consts.nim
new file mode 100644
index 000000000..d72c9786d
--- /dev/null
+++ b/tools/detect/windows_amd64_consts.nim
@@ -0,0 +1,152 @@
+# Generated by detect.nim
+const
+  E2BIG* = cint(7)
+  EACCES* = cint(13)
+  EADDRINUSE* = cint(100)
+  EADDRNOTAVAIL* = cint(101)
+  EAFNOSUPPORT* = cint(102)
+  EAGAIN* = cint(11)
+  EALREADY* = cint(103)
+  EBADF* = cint(9)
+  EBUSY* = cint(16)
+  ECANCELED* = cint(105)
+  ECHILD* = cint(10)
+  ECONNABORTED* = cint(106)
+  ECONNREFUSED* = cint(107)
+  ECONNRESET* = cint(108)
+  EDEADLK* = cint(36)
+  EDESTADDRREQ* = cint(109)
+  EDOM* = cint(33)
+  EEXIST* = cint(17)
+  EFAULT* = cint(14)
+  EFBIG* = cint(27)
+  EHOSTUNREACH* = cint(110)
+  EILSEQ* = cint(42)
+  EINPROGRESS* = cint(112)
+  EINTR* = cint(4)
+  EINVAL* = cint(22)
+  EIO* = cint(5)
+  EISCONN* = cint(113)
+  EISDIR* = cint(21)
+  ELOOP* = cint(114)
+  EMFILE* = cint(24)
+  EMLINK* = cint(31)
+  EMSGSIZE* = cint(115)
+  ENAMETOOLONG* = cint(38)
+  ENETDOWN* = cint(116)
+  ENETRESET* = cint(117)
+  ENETUNREACH* = cint(118)
+  ENFILE* = cint(23)
+  ENOBUFS* = cint(119)
+  ENODEV* = cint(19)
+  ENOENT* = cint(2)
+  ENOEXEC* = cint(8)
+  ENOLCK* = cint(39)
+  ENOMEM* = cint(12)
+  ENOPROTOOPT* = cint(123)
+  ENOSPC* = cint(28)
+  ENOSYS* = cint(40)
+  ENOTCONN* = cint(126)
+  ENOTDIR* = cint(20)
+  ENOTEMPTY* = cint(41)
+  ENOTSOCK* = cint(128)
+  ENOTSUP* = cint(129)
+  ENOTTY* = cint(25)
+  ENXIO* = cint(6)
+  EOPNOTSUPP* = cint(130)
+  EOVERFLOW* = cint(132)
+  EPERM* = cint(1)
+  EPIPE* = cint(32)
+  EPROTO* = cint(134)
+  EPROTONOSUPPORT* = cint(135)
+  EPROTOTYPE* = cint(136)
+  ERANGE* = cint(34)
+  EROFS* = cint(30)
+  ESPIPE* = cint(29)
+  ESRCH* = cint(3)
+  ETIMEDOUT* = cint(138)
+  EWOULDBLOCK* = cint(140)
+  EXDEV* = cint(18)
+  O_CREAT* = cint(256)
+  O_EXCL* = cint(1024)
+  O_TRUNC* = cint(512)
+  O_APPEND* = cint(8)
+  O_ACCMODE* = cint(3)
+  O_RDONLY* = cint(0)
+  O_RDWR* = cint(2)
+  O_WRONLY* = cint(1)
+  FE_DIVBYZERO* = cint(4)
+  FE_INEXACT* = cint(32)
+  FE_INVALID* = cint(1)
+  FE_OVERFLOW* = cint(8)
+  FE_UNDERFLOW* = cint(16)
+  FE_ALL_EXCEPT* = cint(63)
+  FE_DOWNWARD* = cint(1024)
+  FE_TONEAREST* = cint(0)
+  FE_TOWARDZERO* = cint(3072)
+  FE_UPWARD* = cint(2048)
+  FE_DFL_ENV* = pointer(nil)
+  LC_ALL* = cint(0)
+  LC_COLLATE* = cint(1)
+  LC_CTYPE* = cint(2)
+  LC_MONETARY* = cint(3)
+  LC_NUMERIC* = cint(4)
+  LC_TIME* = cint(5)
+  PTHREAD_BARRIER_SERIAL_THREAD* = cint(1)
+  PTHREAD_CANCEL_ASYNCHRONOUS* = cint(2)
+  PTHREAD_CANCEL_ENABLE* = cint(1)
+  PTHREAD_CANCEL_DEFERRED* = cint(0)
+  PTHREAD_CANCEL_DISABLE* = cint(0)
+  PTHREAD_CREATE_DETACHED* = cint(4)
+  PTHREAD_CREATE_JOINABLE* = cint(0)
+  PTHREAD_EXPLICIT_SCHED* = cint(0)
+  PTHREAD_INHERIT_SCHED* = cint(8)
+  PTHREAD_MUTEX_DEFAULT* = cint(0)
+  PTHREAD_MUTEX_ERRORCHECK* = cint(1)
+  PTHREAD_MUTEX_NORMAL* = cint(0)
+  PTHREAD_MUTEX_RECURSIVE* = cint(2)
+  PTHREAD_PRIO_INHERIT* = cint(8)
+  PTHREAD_PRIO_NONE* = cint(0)
+  PTHREAD_PRIO_PROTECT* = cint(16)
+  PTHREAD_PROCESS_SHARED* = cint(1)
+  PTHREAD_PROCESS_PRIVATE* = cint(0)
+  PTHREAD_SCOPE_PROCESS* = cint(0)
+  PTHREAD_SCOPE_SYSTEM* = cint(16)
+  F_OK* = cint(0)
+  R_OK* = cint(4)
+  W_OK* = cint(2)
+  X_OK* = cint(1)
+  SEEK_SET* = cint(0)
+  SEEK_CUR* = cint(1)
+  SEEK_END* = cint(2)
+  SEM_FAILED* = pointer(nil)
+  S_IFMT* = cint(61440)
+  S_IFBLK* = cint(12288)
+  S_IFCHR* = cint(8192)
+  S_IFIFO* = cint(4096)
+  S_IFREG* = cint(32768)
+  S_IFDIR* = cint(16384)
+  S_IRWXU* = cint(448)
+  S_IRUSR* = cint(256)
+  S_IWUSR* = cint(128)
+  S_IXUSR* = cint(64)
+  CLOCKS_PER_SEC* = clong(1000)
+  CLOCK_PROCESS_CPUTIME_ID* = cint(2)
+  CLOCK_THREAD_CPUTIME_ID* = cint(3)
+  CLOCK_REALTIME* = cint(0)
+  TIMER_ABSTIME* = cint(1)
+  CLOCK_MONOTONIC* = cint(1)
+  SIGABRT* = cint(22)
+  SIGFPE* = cint(8)
+  SIGILL* = cint(4)
+  SIGINT* = cint(2)
+  SIGSEGV* = cint(11)
+  SIGTERM* = cint(15)
+  SIG_BLOCK* = cint(0)
+  SIG_UNBLOCK* = cint(1)
+  SIG_SETMASK* = cint(2)
+  SCHED_FIFO* = cint(1)
+  SCHED_RR* = cint(2)
+  SCHED_OTHER* = cint(0)
+  IOFBF* = cint(0)
+  IONBF* = cint(4)
diff --git a/tools/detect/windows_i386_consts.nim b/tools/detect/windows_i386_consts.nim
new file mode 100644
index 000000000..cd6c475f4
--- /dev/null
+++ b/tools/detect/windows_i386_consts.nim
@@ -0,0 +1,96 @@
+# Generated by detect.nim
+const
+  E2BIG* = cint(7)
+  EACCES* = cint(13)
+  EAGAIN* = cint(11)
+  EBADF* = cint(9)
+  EBUSY* = cint(16)
+  ECHILD* = cint(10)
+  EDEADLK* = cint(36)
+  EDOM* = cint(33)
+  EEXIST* = cint(17)
+  EFAULT* = cint(14)
+  EFBIG* = cint(27)
+  EILSEQ* = cint(42)
+  EINTR* = cint(4)
+  EINVAL* = cint(22)
+  EIO* = cint(5)
+  EISDIR* = cint(21)
+  EMFILE* = cint(24)
+  EMLINK* = cint(31)
+  ENAMETOOLONG* = cint(38)
+  ENFILE* = cint(23)
+  ENODEV* = cint(19)
+  ENOENT* = cint(2)
+  ENOEXEC* = cint(8)
+  ENOLCK* = cint(39)
+  ENOMEM* = cint(12)
+  ENOSPC* = cint(28)
+  ENOSYS* = cint(40)
+  ENOTDIR* = cint(20)
+  ENOTEMPTY* = cint(41)
+  ENOTSUP* = cint(48)
+  ENOTTY* = cint(25)
+  ENXIO* = cint(6)
+  EPERM* = cint(1)
+  EPIPE* = cint(32)
+  ERANGE* = cint(34)
+  EROFS* = cint(30)
+  ESPIPE* = cint(29)
+  ESRCH* = cint(3)
+  ETIMEDOUT* = cint(10060)
+  EXDEV* = cint(18)
+  O_CREAT* = cint(256)
+  O_EXCL* = cint(1024)
+  O_TRUNC* = cint(512)
+  O_APPEND* = cint(8)
+  O_ACCMODE* = cint(3)
+  O_RDONLY* = cint(0)
+  O_RDWR* = cint(2)
+  O_WRONLY* = cint(1)
+  FE_DIVBYZERO* = cint(4)
+  FE_INEXACT* = cint(32)
+  FE_INVALID* = cint(1)
+  FE_OVERFLOW* = cint(8)
+  FE_UNDERFLOW* = cint(16)
+  FE_ALL_EXCEPT* = cint(63)
+  FE_DOWNWARD* = cint(1024)
+  FE_TONEAREST* = cint(0)
+  FE_TOWARDZERO* = cint(3072)
+  FE_UPWARD* = cint(2048)
+  FE_DFL_ENV* = pointer(nil)
+  LC_ALL* = cint(0)
+  LC_COLLATE* = cint(1)
+  LC_CTYPE* = cint(2)
+  LC_MONETARY* = cint(3)
+  LC_NUMERIC* = cint(4)
+  LC_TIME* = cint(5)
+  F_OK* = cint(0)
+  R_OK* = cint(4)
+  W_OK* = cint(2)
+  X_OK* = cint(1)
+  SEEK_SET* = cint(0)
+  SEEK_CUR* = cint(1)
+  SEEK_END* = cint(2)
+  S_IFMT* = cint(61440)
+  S_IFBLK* = cint(12288)
+  S_IFCHR* = cint(8192)
+  S_IFIFO* = cint(4096)
+  S_IFREG* = cint(32768)
+  S_IFDIR* = cint(16384)
+  S_IRWXU* = cint(448)
+  S_IRUSR* = cint(256)
+  S_IWUSR* = cint(128)
+  S_IXUSR* = cint(64)
+  CLOCKS_PER_SEC* = clong(1000)
+  SIGABRT* = cint(22)
+  SIGFPE* = cint(8)
+  SIGILL* = cint(4)
+  SIGINT* = cint(2)
+  SIGSEGV* = cint(11)
+  SIGTERM* = cint(15)
+  SIG_BLOCK* = cint(0)
+  SIG_UNBLOCK* = cint(1)
+  SIG_SETMASK* = cint(2)
+  IOFBF* = cint(0)
+  IONBF* = cint(4)
diff --git a/web/news.txt b/web/news.txt
index 4f40d6484..d6d269649 100644
--- a/web/news.txt
+++ b/web/news.txt
@@ -28,6 +28,8 @@ Changes affecting backwards compatibility
   require an error code to be passed to them. This error code can be retrieved
   using the new ``OSLastError`` proc.
 - ``os.parentDir`` now returns "" if there is no parent dir.
+- In CGI scripts stacktraces are shown user only if cgi.setStackTraceStdout
+  is used.
 
 Compiler Additions
 ------------------
@@ -40,8 +42,10 @@ Compiler Additions
   over the generated code.
 - The compiler now supports a ``computedGoto`` pragma to support very fast
   dispatching for interpreters and the like.
-- In CGI scripts stacktraces are shown user only if cgi.setStackTraceStdout
-  is used.
+- The old evaluation engine has been replaced by a proper register based
+  virtual machine. This fixes numerous bugs for ``nimrod i`` and for macro
+  evaluation.
+
 
 Language Additions
 ------------------
@@ -55,7 +59,7 @@ Language Additions
   OOP-like syntactic sugar.
 - Added ``delegator pragma`` for handling calls to missing procs and fields at
   compile-time.
-- Support for user-defined type classes have been added.
+- Support for user-defined type classes has been added.
 
 
 Tools improvements