32 files changed, 7364 insertions, 112 deletions
diff --git a/lib/core/macros.nim b/lib/core/macros.nim
index 52ee9326f..8ffd268ff 100644
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -158,7 +158,13 @@ proc `intVal=`*(n: PNimrodNode, val: biggestInt) {.magic: "NSetIntVal".}
 proc `floatVal=`*(n: PNimrodNode, val: biggestFloat) {.magic: "NSetFloatVal".}
 proc `symbol=`*(n: PNimrodNode, val: PNimrodSymbol) {.magic: "NSetSymbol".}
 proc `ident=`*(n: PNimrodNode, val: TNimrodIdent) {.magic: "NSetIdent".}
-proc `typ=`*(n: PNimrodNode, typ: typedesc) {.magic: "NSetType".}
+#proc `typ=`*(n: PNimrodNode, typ: typedesc) {.magic: "NSetType".}
+# this is not sound! Unfortunately forbidding 'typ=' is not enough, as you
+# can easily do:
+#   let bracket = semCheck([1, 2])
+#   let fake = semCheck(2.0)
+#   bracket[0] = fake  # constructs a mixed array with ints and floats!
+
 proc `strVal=`*(n: PNimrodNode, val: string) {.magic: "NSetStrVal".}
 
 proc newNimNode*(kind: TNimrodNodeKind,
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim
index 69da2bba8..4bfdf5e58 100644
--- a/lib/packages/docutils/highlite.nim
+++ b/lib/packages/docutils/highlite.nim
@@ -53,7 +53,7 @@ const
     "interface", "is", "isnot", "iterator", "lambda", "let", "macro", "method",
     "mixin", "mod", "nil", "not", "notin", "object", "of", "or", "out", "proc",
     "ptr", "raise", "ref", "return", "shared", "shl", "shr", "static",
-    "template", "try", "tuple", "type", "var", "when", "while", "with",
+    "template", "try", "tuple", "type", "using", "var", "when", "while", "with",
     "without", "xor", "yield"]
 
 proc getSourceLanguage*(name: string): TSourceLanguage = 
diff --git a/lib/pure/encodings.nim b/lib/pure/encodings.nim
index ce4238409..bc849bfe8 100644
--- a/lib/pure/encodings.nim
+++ b/lib/pure/encodings.nim
@@ -14,15 +14,15 @@ import os, parseutils, strutils
 
 when not defined(windows):
   type
-    TConverter = object {.pure, final.}
+    TConverter = object
     PConverter* = ptr TConverter ## can convert between two character sets
-    
+
 else:
   type
     TCodePage = distinct int32
-    PConverter* = object {.pure.}
+    PConverter* = object
       dest, src: TCodePage
-    
+
 type
   EInvalidEncoding* = object of EInvalidValue ## exception that is raised
                                               ## for encoding errors
@@ -425,7 +425,7 @@ else:
           dst = cast[cstring](cast[int](cstring(result)) + offset)
           outLen = len(result) - offset
         else:
-          OSError()
+          OSError(lerr.TOSErrorCode)
     # iconv has a buffer that needs flushing, specially if the last char is 
     # not '\0'
     discard iconv(c, nil, nil, dst, outlen)
diff --git a/lib/pure/os.nim b/lib/pure/os.nim
index d74cb1fb9..2e15587f4 100644
--- a/lib/pure/os.nim
+++ b/lib/pure/os.nim
@@ -165,8 +165,12 @@ else: # UNIX-like operating system
     DynlibFormat* = when defined(macosx): "lib$1.dylib" else: "lib$1.so"
 
 when defined(posix):
-  var
-    pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint
+  when NoFakeVars:
+    const pathMax = 5000 # doesn't matter really. The concept of PATH_MAX
+                         # doesn't work anymore on modern OSes.
+  else:
+    var
+      pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint
 
 const
   ExtSep* = '.'
@@ -341,16 +345,6 @@ when defined(windows):
     template FindNextFile(a, b: expr): expr = FindNextFileW(a, b)
     template getCommandLine(): expr = getCommandLineW()
 
-    proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
-      let 
-        nul = 0
-        dot = ord('.')
-      result = (f.cFilename[0].int == dot)
-      if result:
-        result = (f.cFilename[1].int in {dot, nul})
-        if result:
-          result = (f.cFilename[2].int == nul)
-
     template getFilename(f: expr): expr =
       $cast[WideCString](addr(f.cFilename[0]))
   else:
@@ -358,18 +352,13 @@ when defined(windows):
     template FindNextFile(a, b: expr): expr = FindNextFileA(a, b)
     template getCommandLine(): expr = getCommandLineA()
 
-    proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
-      let 
-        nul = '\0'
-        dot = '.'
-      result = (f.cFilename[0] == dot)
-      if result:
-        result = (f.cFilename[1] in {dot, nul})
-        if result:
-          result = (f.cFilename[2] == nul)
-
     template getFilename(f: expr): expr = $f.cFilename
 
+  proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} =
+    const dot = ord('.')
+    result = f.cFilename[0].int == dot and(f.cFilename[1].int == 0 or
+             f.cFilename[1].int == dot and f.cFilename[2].int == 0)
+
 proc existsFile*(filename: string): bool {.rtl, extern: "nos$1",
                                           tags: [FReadDir].} =
   ## Returns true if the file exists, false otherwise.
@@ -468,20 +457,21 @@ proc setCurrentDir*(newDir: string) {.inline, tags: [].} =
   ## `newDir` cannot been set.
   when defined(Windows):
     when useWinUnicode:
-      if SetCurrentDirectoryW(newWideCString(newDir)) == 0'i32: OSError(OSLastError())
+      if SetCurrentDirectoryW(newWideCString(newDir)) == 0'i32:
+        OSError(OSLastError())
     else:
       if SetCurrentDirectoryA(newDir) == 0'i32: OSError(OSLastError())
   else:
     if chdir(newDir) != 0'i32: OSError(OSLastError())
 
-proc JoinPath*(head, tail: string): string {.
+proc joinPath*(head, tail: string): string {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Joins two directory names to one.
   ##
   ## For example on Unix:
   ##
   ## .. code-block:: nimrod
-  ##   JoinPath("usr", "lib")
+  ##   joinPath("usr", "lib")
   ##
   ## results in:
   ##
@@ -495,10 +485,10 @@ proc JoinPath*(head, tail: string): string {.
   ## examples on Unix:
   ##
   ## .. code-block:: nimrod
-  ##   assert JoinPath("usr", "") == "usr/"
-  ##   assert JoinPath("", "lib") == "lib"
-  ##   assert JoinPath("", "/lib") == "/lib"
-  ##   assert JoinPath("usr/", "/lib") == "usr/lib"
+  ##   assert joinPath("usr", "") == "usr/"
+  ##   assert joinPath("", "lib") == "lib"
+  ##   assert joinPath("", "/lib") == "/lib"
+  ##   assert joinPath("usr/", "/lib") == "usr/lib"
   if len(head) == 0:
     result = tail
   elif head[len(head)-1] in {DirSep, AltSep}:
@@ -512,14 +502,14 @@ proc JoinPath*(head, tail: string): string {.
     else:
       result = head & DirSep & tail
 
-proc JoinPath*(parts: varargs[string]): string {.noSideEffect,
+proc joinPath*(parts: varargs[string]): string {.noSideEffect,
   rtl, extern: "nos$1OpenArray".} =
-  ## The same as `JoinPath(head, tail)`, but works with any number of directory
+  ## The same as `joinPath(head, tail)`, but works with any number of directory
   ## parts. You need to pass at least one element or the proc will assert in
   ## debug builds and crash on release builds.
   result = parts[0]
   for i in 1..high(parts):
-    result = JoinPath(result, parts[i])
+    result = joinPath(result, parts[i])
 
 proc `/` * (head, tail: string): string {.noSideEffect.} =
   ## The same as ``JoinPath(head, tail)``
@@ -531,9 +521,9 @@ proc `/` * (head, tail: string): string {.noSideEffect.} =
   ##   assert "" / "lib" == "lib"
   ##   assert "" / "/lib" == "/lib"
   ##   assert "usr/" / "/lib" == "usr/lib"
-  return JoinPath(head, tail)
+  return joinPath(head, tail)
 
-proc SplitPath*(path: string): tuple[head, tail: string] {.
+proc splitPath*(path: string): tuple[head, tail: string] {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Splits a directory into (head, tail), so that
   ## ``JoinPath(head, tail) == path``.
@@ -541,11 +531,11 @@ proc SplitPath*(path: string): tuple[head, tail: string] {.
   ## Examples:
   ##
   ## .. code-block:: nimrod
-  ##   SplitPath("usr/local/bin") -> ("usr/local", "bin")
-  ##   SplitPath("usr/local/bin/") -> ("usr/local/bin", "")
-  ##   SplitPath("bin") -> ("", "bin")
-  ##   SplitPath("/bin") -> ("", "bin")
-  ##   SplitPath("") -> ("", "")
+  ##   splitPath("usr/local/bin") -> ("usr/local", "bin")
+  ##   splitPath("usr/local/bin/") -> ("usr/local/bin", "")
+  ##   splitPath("bin") -> ("", "bin")
+  ##   splitPath("/bin") -> ("", "bin")
+  ##   splitPath("") -> ("", "")
   var sepPos = -1
   for i in countdown(len(path)-1, 0):
     if path[i] in {dirsep, altsep}:
@@ -699,7 +689,7 @@ proc expandFilename*(filename: string): string {.rtl, extern: "nos$1",
     if r.isNil: OSError(OSLastError())
     setlen(result, c_strlen(result))
 
-proc ChangeFileExt*(filename, ext: string): string {.
+proc changeFileExt*(filename, ext: string): string {.
   noSideEffect, rtl, extern: "nos$1".} =
   ## Changes the file extension to `ext`.
   ##
@@ -971,7 +961,10 @@ proc moveFile*(source, dest: string) {.rtl, extern: "nos$1",
     raise newException(EOS, $strerror(errno))
 
 when not defined(ENOENT) and not defined(Windows):
-  var ENOENT {.importc, header: "<errno.h>".}: cint
+  when NoFakeVars:
+    const ENOENT = cint(2) # 2 on most systems including Solaris
+  else:
+    var ENOENT {.importc, header: "<errno.h>".}: cint
 
 when defined(Windows):
   when useWinUnicode:
@@ -1028,7 +1021,7 @@ when defined(windows):
     proc strEnd(cstr: wideCString, c = 0'i32): wideCString {.
       importc: "wcschr", header: "<string.h>".}
   else:
-    proc strEnd(cstr: CString, c = 0'i32): CString {.
+    proc strEnd(cstr: cstring, c = 0'i32): cstring {.
       importc: "strchr", header: "<string.h>".}
 
   proc getEnvVarsC() =
@@ -1333,7 +1326,7 @@ proc copyDir*(source, dest: string) {.rtl, extern: "nos$1",
       copyFile(path, dest / noSource)
     of pcDir:
       copyDir(path, dest / noSource)
-    else: nil
+    else: discard
 
 proc parseCmdLine*(c: string): seq[string] {.
   noSideEffect, rtl, extern: "nos$1".} =
diff --git a/lib/pure/sockets.nim b/lib/pure/sockets.nim
index 66bb1e6a9..684def978 100644
--- a/lib/pure/sockets.nim
+++ b/lib/pure/sockets.nim
@@ -131,7 +131,7 @@ type
 
   ETimeout* = object of ESynch
 
-let
+const
   InvalidSocket*: TSocket = nil ## invalid socket
 
 when defined(windows):
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index fe71cb77b..e290226d2 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -93,7 +93,7 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
   var j = 0
   for i in 0..len(s) - 1:
     if s[i] in {'A'..'Z'}:
-      result[j] = Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
       inc j
     elif s[i] != '_':
       result[j] = s[i]
@@ -1022,8 +1022,8 @@ proc editDistance*(a, b: string): int {.noSideEffect,
 
 # floating point formating:
 
-proc c_sprintf(buf, frmt: CString) {.nodecl, importc: "sprintf", varargs,
-                                     noSideEffect.}
+proc c_sprintf(buf, frmt: CString) {.header: "<stdio.h>", importc: "sprintf",
+                                     varargs, noSideEffect.}
 
 type
   TFloatFormat* = enum ## the different modes of floating point formating
diff --git a/lib/system.nim b/lib/system.nim
index 106eb04a3..14be9cc21 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -186,7 +186,10 @@ proc `..`*[T](b: T): TSlice[T] {.noSideEffect, inline.} =
 when not defined(niminheritable):
   {.pragma: inheritable.}
 
-when not defined(JS) and not defined(NimrodVM):
+const NoFakeVars* = defined(NimrodVM) ## true if the backend doesn't support \
+  ## "fake variables" like 'var EBADF {.importc.}: cint'.
+
+when not defined(JS):
   type
     TGenericSeq {.compilerproc, pure, inheritable.} = object
       len, reserved: int
@@ -195,7 +198,8 @@ when not defined(JS) and not defined(NimrodVM):
     NimStringDesc {.compilerproc, final.} = object of TGenericSeq
       data: array[0..100_000_000, char]
     NimString = ptr NimStringDesc
-    
+
+when not defined(JS) and not defined(NimrodVM):
   template space(s: PGenericSeq): int {.dirty.} =
     s.reserved and not seqShallowFlag
 
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 13e8496d2..398656d0a 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -40,21 +40,40 @@ var
 
 # constants faked as variables:
 when not defined(SIGINT):
-  var 
-    SIGINT {.importc: "SIGINT", nodecl.}: cint
-    SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
-    SIGABRT {.importc: "SIGABRT", nodecl.}: cint
-    SIGFPE {.importc: "SIGFPE", nodecl.}: cint
-    SIGILL {.importc: "SIGILL", nodecl.}: cint
+  when NoFakeVars:
+    when defined(windows):
+      const
+        SIGABRT = cint(22)
+        SIGFPE = cint(8)
+        SIGILL = cint(4)
+        SIGINT = cint(2)
+        SIGSEGV = cint(11)
+        SIGTERM = cint(15)
+    elif defined(macosx):
+      const
+        SIGABRT = cint(6)
+        SIGFPE = cint(8)
+        SIGILL = cint(4)
+        SIGINT = cint(2)
+        SIGSEGV = cint(11)
+        SIGTERM = cint(15)
+    else:
+      {.error: "SIGABRT not ported to your platform".}
+  else:
+    var
+      SIGINT {.importc: "SIGINT", nodecl.}: cint
+      SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
+      SIGABRT {.importc: "SIGABRT", nodecl.}: cint
+      SIGFPE {.importc: "SIGFPE", nodecl.}: cint
+      SIGILL {.importc: "SIGILL", nodecl.}: cint
 
 when defined(macosx):
-  var
-    SIGBUS {.importc: "SIGBUS", nodecl.}: cint
-      # hopefully this does not lead to new bugs
+  when NoFakeVars:
+    const SIGBUS = cint(10)
+  else:
+    var SIGBUS {.importc: "SIGBUS", nodecl.}: cint
 else:
-  var
-    SIGBUS {.importc: "SIGSEGV", nodecl.}: cint
-      # only Mac OS X has this shit
+  template SIGBUS: expr = SIGSEGV
 
 proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.
   header: "<setjmp.h>", importc: "longjmp".}
@@ -111,16 +130,22 @@ proc c_realloc(p: pointer, newsize: int): pointer {.
 
 when hostOS != "standalone":
   when not defined(errno):
-    var errno {.importc, header: "<errno.h>".}: cint ## error variable
+    when defined(NimrodVM):
+      var vmErrnoWrapper {.importc.}: ptr cint
+      template errno: expr = 
+        bind vmErrnoWrapper
+        vmErrnoWrapper[]
+    else:
+      var errno {.importc, header: "<errno.h>".}: cint ## error variable
 proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".}
 
-proc c_remove(filename: CString): cint {.
+proc c_remove(filename: cstring): cint {.
   importc: "remove", header: "<stdio.h>".}
-proc c_rename(oldname, newname: CString): cint {.
+proc c_rename(oldname, newname: cstring): cint {.
   importc: "rename", header: "<stdio.h>".}
 
-proc c_system(cmd: CString): cint {.importc: "system", header: "<stdlib.h>".}
-proc c_getenv(env: CString): CString {.importc: "getenv", header: "<stdlib.h>".}
-proc c_putenv(env: CString): cint {.importc: "putenv", header: "<stdlib.h>".}
+proc c_system(cmd: cstring): cint {.importc: "system", header: "<stdlib.h>".}
+proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
+proc c_putenv(env: cstring): cint {.importc: "putenv", header: "<stdlib.h>".}
 
 {.pop}
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index a877f8b28..8d9400a7d 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -45,9 +45,21 @@ proc setvbuf(stream: TFile, buf: pointer, typ, size: cint): cint {.
 proc write(f: TFile, c: cstring) = fputs(c, f)
 {.pop.}
 
-var
-  IOFBF {.importc: "_IOFBF", nodecl.}: cint
-  IONBF {.importc: "_IONBF", nodecl.}: cint
+when NoFakeVars:
+  when defined(windows):
+    const
+      IOFBF = cint(0)
+      IONBF = cint(4)
+  elif defined(macosx):
+    const
+      IOFBF = cint(0)
+      IONBF = cint(2)
+  else:
+    {.error: "IOFBF not ported to your platform".}
+else:
+  var
+    IOFBF {.importc: "_IOFBF", nodecl.}: cint
+    IONBF {.importc: "_IONBF", nodecl.}: cint
 
 const
   buf_size = 4000
@@ -149,7 +161,7 @@ proc writeFile(filename, content: string) =
   finally:
     close(f)
 
-proc EndOfFile(f: TFile): bool =
+proc endOfFile(f: TFile): bool =
   # do not blame me; blame the ANSI C standard this is so brain-damaged
   var c = fgetc(f)
   ungetc(c, f)
@@ -194,9 +206,9 @@ const
     # should not be translated.
 
 
-proc Open(f: var TFile, filename: string,
+proc open(f: var TFile, filename: string,
           mode: TFileMode = fmRead,
-          bufSize: int = -1): Bool =
+          bufSize: int = -1): bool =
   var p: pointer = fopen(filename, FormatOpen[mode])
   result = (p != nil)
   f = cast[TFile](p)
@@ -223,10 +235,10 @@ proc fwrite(buf: Pointer, size, n: int, f: TFile): int {.
 proc readBuffer(f: TFile, buffer: pointer, len: int): int =
   result = fread(buffer, 1, len, f)
 
-proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
+proc readBytes(f: TFile, a: var openarray[int8], start, len: int): int =
   result = readBuffer(f, addr(a[start]), len)
 
-proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
+proc readChars(f: TFile, a: var openarray[char], start, len: int): int =
   result = readBuffer(f, addr(a[start]), len)
 
 {.push stackTrace:off, profiler:off.}
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index cf1f0910c..d856cc830 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -9,6 +9,9 @@
 
 ## Nimrod support for C/C++'s `wide strings`:idx:. This is part of the system
 ## module! Do not import it directly!
+
+when not defined(NimString):
+  {.error: "You must not import this module explicitly".}
 
 type
   TUtf16Char* = distinct int16
@@ -101,7 +104,8 @@ proc newWideCString*(s: cstring): WideCString =
   if s.isNil: return nil
 
   when not defined(c_strlen):
-    proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
+    proc c_strlen(a: cstring): int {.
+      header: "<string.h>", noSideEffect, importc: "strlen".}
 
   let L = cstrlen(s)
   result = newWideCString(s, L)
diff --git a/lib/wrappers/libffi/common/ffi.h b/lib/wrappers/libffi/common/ffi.h
new file mode 100644
index 000000000..07d650eac
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffi.h
@@ -0,0 +1,331 @@
+/* -----------------------------------------------------------------*-C-*-
+   libffi 2.00-beta - Copyright (c) 1996-2003  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------
+   The basic API is described in the README file.
+
+   The raw API is designed to bypass some of the argument packing
+   and unpacking on architectures for which it can be avoided.
+
+   The closure API allows interpreted functions to be packaged up
+   inside a C function pointer, so that they can be called as C functions,
+   with no understanding on the client side that they are interpreted.
+   It can also be used in other cases in which it is necessary to package
+   up a user specified parameter and a function pointer as a single
+   function pointer.
+
+   The closure API must be implemented in order to get its functionality,
+   e.g. for use by gij.  Routines are provided to emulate the raw API
+   if the underlying platform doesn't allow faster implementation.
+
+   More details on the raw and cloure API can be found in:
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00138.html
+
+   and
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00174.html
+   -------------------------------------------------------------------- */
+
+#ifndef LIBFFI_H
+#define LIBFFI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Specify which architecture libffi is configured for. */
+//XXX #define X86
+
+/* ---- System configuration information --------------------------------- */
+
+#include <ffitarget.h>
+
+#ifndef LIBFFI_ASM
+
+#include <stddef.h>
+#include <limits.h>
+
+/* LONG_LONG_MAX is not always defined (not if STRICT_ANSI, for example).
+   But we can find it either under the correct ANSI name, or under GNU
+   C's internal name.  */
+#ifdef LONG_LONG_MAX
+# define FFI_LONG_LONG_MAX LONG_LONG_MAX
+#else
+# ifdef LLONG_MAX
+#  define FFI_LONG_LONG_MAX LLONG_MAX
+# else
+#  ifdef __GNUC__
+#   define FFI_LONG_LONG_MAX __LONG_LONG_MAX__
+#  endif
+#  ifdef _MSC_VER
+#   define FFI_LONG_LONG_MAX _I64_MAX
+#  endif
+# endif
+#endif
+
+#if SCHAR_MAX == 127
+# define ffi_type_uchar                ffi_type_uint8
+# define ffi_type_schar                ffi_type_sint8
+#else
+ #error "char size not supported"
+#endif
+
+#if SHRT_MAX == 32767
+# define ffi_type_ushort       ffi_type_uint16
+# define ffi_type_sshort       ffi_type_sint16
+#elif SHRT_MAX == 2147483647
+# define ffi_type_ushort       ffi_type_uint32
+# define ffi_type_sshort       ffi_type_sint32
+#else
+ #error "short size not supported"
+#endif
+
+#if INT_MAX == 32767
+# define ffi_type_uint         ffi_type_uint16
+# define ffi_type_sint         ffi_type_sint16
+#elif INT_MAX == 2147483647
+# define ffi_type_uint         ffi_type_uint32
+# define ffi_type_sint         ffi_type_sint32
+#elif INT_MAX == 9223372036854775807
+# define ffi_type_uint         ffi_type_uint64
+# define ffi_type_sint         ffi_type_sint64
+#else
+ #error "int size not supported"
+#endif
+
+#define ffi_type_ulong         ffi_type_uint64
+#define ffi_type_slong         ffi_type_sint64
+#if LONG_MAX == 2147483647
+# if FFI_LONG_LONG_MAX != 9223372036854775807
+  #error "no 64-bit data type supported"
+# endif
+#elif LONG_MAX != 9223372036854775807
+ #error "long size not supported"
+#endif
+
+/* The closure code assumes that this works on pointers, i.e. a size_t	*/
+/* can hold a pointer.							*/
+
+typedef struct _ffi_type
+{
+  size_t size;
+  unsigned short alignment;
+  unsigned short type;
+  /*@null@*/ struct _ffi_type **elements;
+} ffi_type;
+
+/* These are defined in types.c */
+extern const ffi_type ffi_type_void;
+extern const ffi_type ffi_type_uint8;
+extern const ffi_type ffi_type_sint8;
+extern const ffi_type ffi_type_uint16;
+extern const ffi_type ffi_type_sint16;
+extern const ffi_type ffi_type_uint32;
+extern const ffi_type ffi_type_sint32;
+extern const ffi_type ffi_type_uint64;
+extern const ffi_type ffi_type_sint64;
+extern const ffi_type ffi_type_float;
+extern const ffi_type ffi_type_double;
+extern const ffi_type ffi_type_longdouble;
+extern const ffi_type ffi_type_pointer;
+
+
+typedef enum {
+  FFI_OK = 0,
+  FFI_BAD_TYPEDEF,
+  FFI_BAD_ABI 
+} ffi_status;
+
+typedef unsigned FFI_TYPE;
+
+typedef struct {
+  ffi_abi abi;
+  unsigned nargs;
+  /*@dependent@*/ ffi_type **arg_types;
+  /*@dependent@*/ ffi_type *rtype;
+  unsigned bytes;
+  unsigned flags;
+#ifdef FFI_EXTRA_CIF_FIELDS
+  FFI_EXTRA_CIF_FIELDS;
+#endif
+} ffi_cif;
+
+/* ---- Definitions for the raw API -------------------------------------- */
+
+#ifdef _WIN64
+#define FFI_SIZEOF_ARG 8
+#else
+#define FFI_SIZEOF_ARG 4
+#endif
+
+typedef union {
+  ffi_sarg  sint;
+  ffi_arg   uint;
+  float	    flt;
+  char      data[FFI_SIZEOF_ARG];
+  void*     ptr;
+} ffi_raw;
+
+void ffi_raw_call (/*@dependent@*/ ffi_cif *cif, 
+		   void (*fn)(), 
+		   /*@out@*/ void *rvalue, 
+		   /*@dependent@*/ ffi_raw *avalue);
+
+void ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw);
+void ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args);
+size_t ffi_raw_size (ffi_cif *cif);
+
+/* This is analogous to the raw API, except it uses Java parameter	*/
+/* packing, even on 64-bit machines.  I.e. on 64-bit machines		*/
+/* longs and doubles are followed by an empty 64-bit word.		*/
+
+void ffi_java_raw_call (/*@dependent@*/ ffi_cif *cif, 
+		        void (*fn)(), 
+		        /*@out@*/ void *rvalue, 
+		        /*@dependent@*/ ffi_raw *avalue);
+
+void ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw);
+void ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args);
+size_t ffi_java_raw_size (ffi_cif *cif);
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#if FFI_CLOSURES
+
+typedef struct {
+  char tramp[FFI_TRAMPOLINE_SIZE];
+  ffi_cif   *cif;
+  void     (*fun)(ffi_cif*,void*,void**,void*);
+  void      *user_data;
+} ffi_closure;
+
+void ffi_closure_free(void *);
+void *ffi_closure_alloc (size_t size, void **code);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure*,
+		  ffi_cif *,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data,
+		  void *codeloc);
+
+typedef struct {
+  char tramp[FFI_TRAMPOLINE_SIZE];
+
+  ffi_cif   *cif;
+
+#if !FFI_NATIVE_RAW_API
+
+  /* if this is enabled, then a raw closure has the same layout 
+     as a regular closure.  We use this to install an intermediate 
+     handler to do the transaltion, void** -> ffi_raw*. */
+
+  void     (*translate_args)(ffi_cif*,void*,void**,void*);
+  void      *this_closure;
+
+#endif
+
+  void     (*fun)(ffi_cif*,void*,ffi_raw*,void*);
+  void      *user_data;
+
+} ffi_raw_closure;
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure*,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data);
+
+ffi_status
+ffi_prep_java_raw_closure (ffi_raw_closure*,
+		           ffi_cif *cif,
+		           void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		           void *user_data);
+
+#endif /* FFI_CLOSURES */
+
+/* ---- Public interface definition -------------------------------------- */
+
+ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif, 
+			ffi_abi abi,
+			unsigned int nargs, 
+			/*@dependent@*/ /*@out@*/ /*@partial@*/ ffi_type *rtype, 
+			/*@dependent@*/ ffi_type **atypes);
+
+void
+ffi_call(/*@dependent@*/ ffi_cif *cif, 
+	 void (*fn)(), 
+	 /*@out@*/ void *rvalue, 
+	 /*@dependent@*/ void **avalue);
+
+/* Useful for eliminating compiler warnings */
+#define FFI_FN(f) ((void (*)())f)
+
+/* ---- Definitions shared with assembly code ---------------------------- */
+
+#endif
+
+/* If these change, update src/mips/ffitarget.h. */
+#define FFI_TYPE_VOID       0    
+#define FFI_TYPE_INT        1
+#define FFI_TYPE_FLOAT      2    
+#define FFI_TYPE_DOUBLE     3
+#if 1
+#define FFI_TYPE_LONGDOUBLE 4
+#else
+#define FFI_TYPE_LONGDOUBLE FFI_TYPE_DOUBLE
+#endif
+#define FFI_TYPE_UINT8      5   
+#define FFI_TYPE_SINT8      6
+#define FFI_TYPE_UINT16     7 
+#define FFI_TYPE_SINT16     8
+#define FFI_TYPE_UINT32     9
+#define FFI_TYPE_SINT32     10
+#define FFI_TYPE_UINT64     11
+#define FFI_TYPE_SINT64     12
+#define FFI_TYPE_STRUCT     13
+#define FFI_TYPE_POINTER    14
+
+/* This should always refer to the last type code (for sanity checks) */
+#define FFI_TYPE_LAST       FFI_TYPE_POINTER
+
+#define FFI_HIDDEN /* no idea what the origial definition looks like ... */
+
+#ifdef __GNUC__
+#  define LIKELY(x) __builtin_expect(x, 1)
+#  define UNLIKELY(x) __builtin_expect(x, 0)
+#else
+#  define LIKELY(x) (x)
+#  define UNLIKELY(x) (x)
+#endif
+
+#define MAYBE_UNUSED
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/lib/wrappers/libffi/common/ffi_common.h b/lib/wrappers/libffi/common/ffi_common.h
new file mode 100644
index 000000000..43fb83b48
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffi_common.h
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   ffi_common.h - Copyright (c) 1996  Red Hat, Inc.
+
+   Common internal definitions and macros. Only necessary for building
+   libffi.
+   ----------------------------------------------------------------------- */
+
+#ifndef FFI_COMMON_H
+#define FFI_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <fficonfig.h>
+#include <malloc.h>
+
+/* Check for the existence of memcpy. */
+#if STDC_HEADERS
+# include <string.h>
+#else
+# ifndef HAVE_MEMCPY
+#  define memcpy(d, s, n) bcopy ((s), (d), (n))
+# endif
+#endif
+
+#if defined(FFI_DEBUG) 
+#include <stdio.h>
+#endif
+
+#ifdef FFI_DEBUG
+/*@exits@*/ void ffi_assert(/*@temp@*/ char *expr, /*@temp@*/ char *file, int line);
+void ffi_stop_here(void);
+void ffi_type_test(/*@temp@*/ /*@out@*/ ffi_type *a, /*@temp@*/ char *file, int line);
+
+#define FFI_ASSERT(x) ((x) ? (void)0 : ffi_assert(#x, __FILE__,__LINE__))
+#define FFI_ASSERT_AT(x, f, l) ((x) ? 0 : ffi_assert(#x, (f), (l)))
+#define FFI_ASSERT_VALID_TYPE(x) ffi_type_test (x, __FILE__, __LINE__)
+#else
+#define FFI_ASSERT(x) 
+#define FFI_ASSERT_AT(x, f, l)
+#define FFI_ASSERT_VALID_TYPE(x)
+#endif
+
+#define ALIGN(v, a)  (((((size_t) (v))-1) | ((a)-1))+1)
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif);
+
+/* Extended cif, used in callback from assembly routine */
+typedef struct
+{
+  /*@dependent@*/ ffi_cif *cif;
+  /*@dependent@*/ void *rvalue;
+  /*@dependent@*/ void **avalue;
+} extended_cif;
+
+/* Terse sized type definitions.  */
+typedef unsigned int UINT8  __attribute__((__mode__(__QI__)));
+typedef signed int   SINT8  __attribute__((__mode__(__QI__)));
+typedef unsigned int UINT16 __attribute__((__mode__(__HI__)));
+typedef signed int   SINT16 __attribute__((__mode__(__HI__)));
+typedef unsigned int UINT32 __attribute__((__mode__(__SI__)));
+typedef signed int   SINT32 __attribute__((__mode__(__SI__)));
+typedef unsigned int UINT64 __attribute__((__mode__(__DI__)));
+typedef signed int   SINT64 __attribute__((__mode__(__DI__)));
+
+typedef float FLOAT32;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+
diff --git a/lib/wrappers/libffi/common/fficonfig.h b/lib/wrappers/libffi/common/fficonfig.h
new file mode 100644
index 000000000..c14f653ec
--- /dev/null
+++ b/lib/wrappers/libffi/common/fficonfig.h
@@ -0,0 +1,96 @@
+/* fficonfig.h.  Originally created by configure, now hand_maintained for MSVC. */
+
+/* fficonfig.h.  Generated automatically by configure.  */
+/* fficonfig.h.in.  Generated automatically from configure.in by autoheader.  */
+
+/* Define this for MSVC, but not for mingw32! */
+#ifdef _MSC_VER
+#define __attribute__(x) /* */
+#endif
+#define alloca _alloca
+
+/*----------------------------------------------------------------*/
+
+/* Define if using alloca.c.  */
+/* #undef C_ALLOCA */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+   This function is required for alloca.c support on those systems.  */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define if you have alloca, as a function or macro.  */
+#define HAVE_ALLOCA 1
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix).  */
+/* #define HAVE_ALLOCA_H 1 */
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files.  */
+#define STDC_HEADERS 1
+
+/* Define if you have the memcpy function.  */
+#define HAVE_MEMCPY 1
+
+/* Define if read-only mmap of a plain file works. */
+//#define HAVE_MMAP_FILE 1
+
+/* Define if mmap of /dev/zero works. */
+//#define HAVE_MMAP_DEV_ZERO 1
+
+/* Define if mmap with MAP_ANON(YMOUS) works. */
+//#define HAVE_MMAP_ANON 1
+
+/* The number of bytes in type double */
+#define SIZEOF_DOUBLE 8
+
+/* The number of bytes in type long double */
+#define SIZEOF_LONG_DOUBLE 12
+
+/* Define if you have the long double type and it is bigger than a double */
+#define HAVE_LONG_DOUBLE 1
+
+/* whether byteorder is bigendian */
+/* #undef WORDS_BIGENDIAN */
+
+/* Define if the host machine stores words of multi-word integers in
+   big-endian order. */
+/* #undef HOST_WORDS_BIG_ENDIAN */
+
+/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
+#define BYTEORDER 1234
+
+/* Define if your assembler and linker support unaligned PC relative relocs. */
+/* #undef HAVE_AS_SPARC_UA_PCREL */
+
+/* Define if your assembler supports .register. */
+/* #undef HAVE_AS_REGISTER_PSEUDO_OP */
+
+/* Define if .eh_frame sections should be read-only. */
+/* #undef HAVE_RO_EH_FRAME */
+
+/* Define to the flags needed for the .section .eh_frame directive. */
+/* #define EH_FRAME_FLAGS "aw" */
+
+/* Define to the flags needed for the .section .eh_frame directive. */
+/* #define EH_FRAME_FLAGS "aw" */
+
+/* Define this if you want extra debugging. */
+/* #undef FFI_DEBUG */
+
+/* Define this is you do not want support for aggregate types. */
+/* #undef FFI_NO_STRUCTS */
+
+/* Define this is you do not want support for the raw API. */
+/* #undef FFI_NO_RAW_API */
+
+/* Define this if you are using Purify and want to suppress spurious messages. */
+/* #undef USING_PURIFY */
+
diff --git a/lib/wrappers/libffi/common/ffitarget.h b/lib/wrappers/libffi/common/ffitarget.h
new file mode 100644
index 000000000..d8d60f2e7
--- /dev/null
+++ b/lib/wrappers/libffi/common/ffitarget.h
@@ -0,0 +1,150 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012  Anthony Green
+                 Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+                 Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__))
+#  if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64)
+#    define X86_64
+#    define X86_WIN64
+#  else
+#    define X86_32
+#    define X86_WIN32
+#  endif
+#endif
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+  /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+  FFI_SYSV,
+  FFI_STDCALL,
+  FFI_THISCALL,
+  FFI_FASTCALL,
+  FFI_MS_CDECL,
+  FFI_LAST_ABI,
+#ifdef _MSC_VER
+  FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
+  FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+
+#elif defined(X86_WIN64)
+  FFI_WIN64,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_WIN64
+
+#else
+  /* ---- Intel x86 and AMD x86-64 - */
+  FFI_SYSV,
+  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
+  FFI_LAST_ABI,
+#if defined(__i386__) || defined(__i386)
+  FFI_DEFAULT_ABI = FFI_SYSV
+#else
+  FFI_DEFAULT_ABI = FFI_UNIX64
+#endif
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 52
+#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
+#define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
+#endif
+#endif
+
+#endif
+
diff --git a/lib/wrappers/libffi/common/malloc_closure.c b/lib/wrappers/libffi/common/malloc_closure.c
new file mode 100644
index 000000000..5b33aa4ca
--- /dev/null
+++ b/lib/wrappers/libffi/common/malloc_closure.c
@@ -0,0 +1,110 @@
+#include <ffi.h>
+#ifdef MS_WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#  define MAP_ANONYMOUS MAP_ANON
+# endif
+#endif
+#include "ctypes.h"
+
+/* BLOCKSIZE can be adjusted.  Larger blocksize will take a larger memory
+   overhead, but allocate less blocks from the system.  It may be that some
+   systems have a limit of how many mmap'd blocks can be open.
+*/
+
+#define BLOCKSIZE _pagesize
+
+/* #define MALLOC_CLOSURE_DEBUG */ /* enable for some debugging output */
+
+/******************************************************************/
+
+typedef union _tagITEM {
+    ffi_closure closure;
+    union _tagITEM *next;
+} ITEM;
+
+static ITEM *free_list;
+static int _pagesize;
+
+static void more_core(void)
+{
+    ITEM *item;
+    int count, i;
+
+/* determine the pagesize */
+#ifdef MS_WIN32
+    if (!_pagesize) {
+        SYSTEM_INFO systeminfo;
+        GetSystemInfo(&systeminfo);
+        _pagesize = systeminfo.dwPageSize;
+    }
+#else
+    if (!_pagesize) {
+#ifdef _SC_PAGESIZE
+        _pagesize = sysconf(_SC_PAGESIZE);
+#else
+        _pagesize = getpagesize();
+#endif
+    }
+#endif
+
+    /* calculate the number of nodes to allocate */
+    count = BLOCKSIZE / sizeof(ITEM);
+
+    /* allocate a memory block */
+#ifdef MS_WIN32
+    item = (ITEM *)VirtualAlloc(NULL,
+                                           count * sizeof(ITEM),
+                                           MEM_COMMIT,
+                                           PAGE_EXECUTE_READWRITE);
+    if (item == NULL)
+        return;
+#else
+    item = (ITEM *)mmap(NULL,
+                        count * sizeof(ITEM),
+                        PROT_READ | PROT_WRITE | PROT_EXEC,
+                        MAP_PRIVATE | MAP_ANONYMOUS,
+                        -1,
+                        0);
+    if (item == (void *)MAP_FAILED)
+        return;
+#endif
+
+#ifdef MALLOC_CLOSURE_DEBUG
+    printf("block at %p allocated (%d bytes), %d ITEMs\n",
+           item, count * sizeof(ITEM), count);
+#endif
+    /* put them into the free list */
+    for (i = 0; i < count; ++i) {
+        item->next = free_list;
+        free_list = item;
+        ++item;
+    }
+}
+
+/******************************************************************/
+
+/* put the item back into the free list */
+void ffi_closure_free(void *p)
+{
+    ITEM *item = (ITEM *)p;
+    item->next = free_list;
+    free_list = item;
+}
+
+/* return one item from the free list, allocating more if needed */
+void *ffi_closure_alloc(size_t ignored, void** codeloc)
+{
+    ITEM *item;
+    if (!free_list)
+        more_core();
+    if (!free_list)
+        return NULL;
+    item = free_list;
+    free_list = item->next;
+    *codeloc = (void *)item;
+    return (void *)item;
+}
diff --git a/lib/wrappers/libffi/common/raw_api.c b/lib/wrappers/libffi/common/raw_api.c
new file mode 100644
index 000000000..ce21372e2
--- /dev/null
+++ b/lib/wrappers/libffi/common/raw_api.c
@@ -0,0 +1,254 @@
+/* -----------------------------------------------------------------------
+   raw_api.c - Copyright (c) 1999, 2008  Red Hat, Inc.
+
+   Author: Kresten Krab Thorup <krab@gnu.org>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This file defines generic functions for use with the raw api. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_NO_RAW_API
+
+size_t
+ffi_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+#if !FFI_NO_STRUCTS
+      if ((*at)->type == FFI_TYPE_STRUCT)
+	result += ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
+      else
+#endif
+	result += ALIGN ((*at)->size, FFI_SIZEOF_ARG);
+    }
+
+  return result;
+}
+
+
+void
+ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1);
+	  break;
+	  
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4	  
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4);
+	  break;
+#endif
+	
+#if !FFI_NO_STRUCTS  
+	case FFI_TYPE_STRUCT:
+	  *args = (raw++)->ptr;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+	  
+	default:
+	  *args = raw;
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+#if !FFI_NO_STRUCTS
+      if ((*tp)->type == FFI_TYPE_STRUCT)
+	{
+	  *args = (raw++)->ptr;
+	}
+      else
+#endif
+	{
+	  *args = (void*) raw;
+	  raw += ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
+	}
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	  (raw++)->uint = *(UINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT8:
+	  (raw++)->sint = *(SINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	  (raw++)->uint = *(UINT16*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT16:
+	  (raw++)->sint = *(SINT16*) (*args);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4
+	case FFI_TYPE_UINT32:
+	  (raw++)->uint = *(UINT32*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT32:
+	  (raw++)->sint = *(SINT32*) (*args);
+	  break;
+#endif
+
+#if !FFI_NO_STRUCTS
+	case FFI_TYPE_STRUCT:
+	  (raw++)->ptr = *args;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, raw, cl->user_data);
+}
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* cl,
+			  ffi_cif *cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
+
+#if FFI_CLOSURES
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure* cl,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data)
+{
+  return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+
+#endif /* !FFI_NO_RAW_API */
diff --git a/lib/wrappers/libffi/gcc/closures.c b/lib/wrappers/libffi/gcc/closures.c
new file mode 100644
index 000000000..c0ee06891
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/closures.c
@@ -0,0 +1,627 @@
+/* -----------------------------------------------------------------------
+   closures.c - Copyright (c) 2007, 2009, 2010  Red Hat, Inc.
+                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
+                Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   Code to allocate and deallocate memory for closures.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined __linux__ && !defined _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
+# if __gnu_linux__
+/* This macro indicates it may be forbidden to map anonymous memory
+   with both write and execute permission.  Code compiled when this
+   option is defined will attempt to map such pages once, but if it
+   fails, it falls back to creating a temporary file in a writable and
+   executable filesystem and mapping pages from it into separate
+   locations in the virtual memory space, one location writable and
+   another executable.  */
+#  define FFI_MMAP_EXEC_WRIT 1
+#  define HAVE_MNTENT 1
+# endif
+# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)
+/* Windows systems may have Data Execution Protection (DEP) enabled, 
+   which requires the use of VirtualMalloc/VirtualFree to alloc/free
+   executable memory. */
+#  define FFI_MMAP_EXEC_WRIT 1
+# endif
+#endif
+
+#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
+# ifdef __linux__
+/* When defined to 1 check for SELinux and if SELinux is active,
+   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
+   might cause audit messages.  */
+#  define FFI_MMAP_EXEC_SELINUX 1
+# endif
+#endif
+
+#if FFI_CLOSURES
+
+# if FFI_EXEC_TRAMPOLINE_TABLE
+
+// Per-target implementation; It's unclear what can reasonable be shared
+// between two OS/architecture implementations.
+
+# elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
+
+#define USE_LOCKS 1
+#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 1
+#endif
+#endif
+
+/* We need to use mmap, not sbrk.  */
+#define HAVE_MORECORE 0
+
+/* We could, in theory, support mremap, but it wouldn't buy us anything.  */
+#define HAVE_MREMAP 0
+
+/* We have no use for this, so save some code and data.  */
+#define NO_MALLINFO 1
+
+/* We need all allocations to be in regular segments, otherwise we
+   lose track of the corresponding code address.  */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+
+/* Don't allocate more than a page unless needed.  */
+#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
+
+#if FFI_CLOSURE_TEST
+/* Don't release single pages, to avoid a worst-case scenario of
+   continuously allocating and releasing single pages, but release
+   pairs of pages, which should do just as well given that allocations
+   are likely to be small.  */
+#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize)
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <stdio.h>
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#ifdef HAVE_MNTENT
+#include <mntent.h>
+#endif /* HAVE_MNTENT */
+#include <sys/param.h>
+#include <pthread.h>
+
+/* We don't want sys/mman.h to be included after we redefine mmap and
+   dlmunmap.  */
+#include <sys/mman.h>
+#define LACKS_SYS_MMAN_H 1
+
+#if FFI_MMAP_EXEC_SELINUX
+#include <sys/statfs.h>
+#include <stdlib.h>
+
+static int selinux_enabled = -1;
+
+static int
+selinux_enabled_check (void)
+{
+  struct statfs sfs;
+  FILE *f;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (statfs ("/selinux", &sfs) >= 0
+      && (unsigned int) sfs.f_type == 0xf97cff8cU)
+    return 1;
+  f = fopen ("/proc/mounts", "r");
+  if (f == NULL)
+    return 0;
+  while (getline (&buf, &len, f) >= 0)
+    {
+      char *p = strchr (buf, ' ');
+      if (p == NULL)
+        break;
+      p = strchr (p + 1, ' ');
+      if (p == NULL)
+        break;
+      if (strncmp (p + 1, "selinuxfs ", 10) == 0)
+        {
+          free (buf);
+          fclose (f);
+          return 1;
+        }
+    }
+  free (buf);
+  fclose (f);
+  return 0;
+}
+
+#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \
+			      : (selinux_enabled = selinux_enabled_check ()))
+
+#else
+
+#define is_selinux_enabled() 0
+
+#endif /* !FFI_MMAP_EXEC_SELINUX */
+
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
+  if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL)
+    return 1;
+  else
+    return 0;
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+                               : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#elif defined (__CYGWIN__) || defined(__INTERIX)
+
+#include <sys/mman.h>
+
+/* Cygwin is Linux-like, but not quite that Linux-like.  */
+#define is_selinux_enabled() 0
+
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+/* Use these for mmap and munmap within dlmalloc.c.  */
+static void *dlmmap(void *, size_t, int, int, int, off_t);
+static int dlmunmap(void *, size_t);
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+
+/* A mutex used to synchronize access to *exec* variables in this file.  */
+static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* A file descriptor of a temporary file from which we'll map
+   executable pages.  */
+static int execfd = -1;
+
+/* The amount of space already allocated from the temporary file.  */
+static size_t execsize = 0;
+
+/* Open a temporary file name, and immediately unlink it.  */
+static int
+open_temp_exec_file_name (char *name)
+{
+  int fd = mkstemp (name);
+
+  if (fd != -1)
+    unlink (name);
+
+  return fd;
+}
+
+/* Open a temporary file in the named directory.  */
+static int
+open_temp_exec_file_dir (const char *dir)
+{
+  static const char suffix[] = "/ffiXXXXXX";
+  int lendir = strlen (dir);
+  char *tempname = __builtin_alloca (lendir + sizeof (suffix));
+
+  if (!tempname)
+    return -1;
+
+  memcpy (tempname, dir, lendir);
+  memcpy (tempname + lendir, suffix, sizeof (suffix));
+
+  return open_temp_exec_file_name (tempname);
+}
+
+/* Open a temporary file in the directory in the named environment
+   variable.  */
+static int
+open_temp_exec_file_env (const char *envvar)
+{
+  const char *value = getenv (envvar);
+
+  if (!value)
+    return -1;
+
+  return open_temp_exec_file_dir (value);
+}
+
+#ifdef HAVE_MNTENT
+/* Open a temporary file in an executable and writable mount point
+   listed in the mounts file.  Subsequent calls with the same mounts
+   keep searching for mount points in the same file.  Providing NULL
+   as the mounts file closes the file.  */
+static int
+open_temp_exec_file_mnt (const char *mounts)
+{
+  static const char *last_mounts;
+  static FILE *last_mntent;
+
+  if (mounts != last_mounts)
+    {
+      if (last_mntent)
+	endmntent (last_mntent);
+
+      last_mounts = mounts;
+
+      if (mounts)
+	last_mntent = setmntent (mounts, "r");
+      else
+	last_mntent = NULL;
+    }
+
+  if (!last_mntent)
+    return -1;
+
+  for (;;)
+    {
+      int fd;
+      struct mntent mnt;
+      char buf[MAXPATHLEN * 3];
+
+      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
+	return -1;
+
+      if (hasmntopt (&mnt, "ro")
+	  || hasmntopt (&mnt, "noexec")
+	  || access (mnt.mnt_dir, W_OK))
+	continue;
+
+      fd = open_temp_exec_file_dir (mnt.mnt_dir);
+
+      if (fd != -1)
+	return fd;
+    }
+}
+#endif /* HAVE_MNTENT */
+
+/* Instructions to look for a location to hold a temporary file that
+   can be mapped in for execution.  */
+static struct
+{
+  int (*func)(const char *);
+  const char *arg;
+  int repeat;
+} open_temp_exec_file_opts[] = {
+  { open_temp_exec_file_env, "TMPDIR", 0 },
+  { open_temp_exec_file_dir, "/tmp", 0 },
+  { open_temp_exec_file_dir, "/var/tmp", 0 },
+  { open_temp_exec_file_dir, "/dev/shm", 0 },
+  { open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
+  { open_temp_exec_file_mnt, "/etc/mtab", 1 },
+  { open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
+};
+
+/* Current index into open_temp_exec_file_opts.  */
+static int open_temp_exec_file_opts_idx = 0;
+
+/* Reset a current multi-call func, then advances to the next entry.
+   If we're at the last, go back to the first and return nonzero,
+   otherwise return zero.  */
+static int
+open_temp_exec_file_opts_next (void)
+{
+  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL);
+
+  open_temp_exec_file_opts_idx++;
+  if (open_temp_exec_file_opts_idx
+      == (sizeof (open_temp_exec_file_opts)
+	  / sizeof (*open_temp_exec_file_opts)))
+    {
+      open_temp_exec_file_opts_idx = 0;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return a file descriptor of a temporary zero-sized file in a
+   writable and exexutable filesystem.  */
+static int
+open_temp_exec_file (void)
+{
+  int fd;
+
+  do
+    {
+      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func
+	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg);
+
+      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat
+	  || fd == -1)
+	{
+	  if (open_temp_exec_file_opts_next ())
+	    break;
+	}
+    }
+  while (fd == -1);
+
+  return fd;
+}
+
+/* Map in a chunk of memory from the temporary exec file into separate
+   locations in the virtual memory address space, one writable and one
+   executable.  Returns the address of the writable portion, after
+   storing an offset to the corresponding executable portion at the
+   last word of the requested chunk.  */
+static void *
+dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+{
+  void *ptr;
+
+  if (execfd == -1)
+    {
+      open_temp_exec_file_opts_idx = 0;
+    retry_open:
+      execfd = open_temp_exec_file ();
+      if (execfd == -1)
+	return MFAIL;
+    }
+
+  offset = execsize;
+
+  if (ftruncate (execfd, offset + length))
+    return MFAIL;
+
+  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+  flags |= MAP_SHARED;
+
+  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC,
+	      flags, execfd, offset);
+  if (ptr == MFAIL)
+    {
+      if (!offset)
+	{
+	  close (execfd);
+	  goto retry_open;
+	}
+      ftruncate (execfd, offset);
+      return MFAIL;
+    }
+  else if (!offset
+	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts_next ();
+
+  start = mmap (start, length, prot, flags, execfd, offset);
+
+  if (start == MFAIL)
+    {
+      munmap (ptr, length);
+      ftruncate (execfd, offset);
+      return start;
+    }
+
+  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start;
+
+  execsize += length;
+
+  return start;
+}
+
+/* Map in a writable and executable chunk of memory if possible.
+   Failing that, fall back to dlmmap_locked.  */
+static void *
+dlmmap (void *start, size_t length, int prot,
+	int flags, int fd, off_t offset)
+{
+  void *ptr;
+
+  assert (start == NULL && length % malloc_getpagesize == 0
+	  && prot == (PROT_READ | PROT_WRITE)
+	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS)
+	  && fd == -1 && offset == 0);
+
+#if FFI_CLOSURE_TEST
+  printf ("mapping in %zi\n", length);
+#endif
+
+  if (execfd == -1 && is_emutramp_enabled ())
+    {
+      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+      return ptr;
+    }
+
+  if (execfd == -1 && !is_selinux_enabled ())
+    {
+      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
+
+      if (ptr != MFAIL || (errno != EPERM && errno != EACCES))
+	/* Cool, no need to mess with separate segments.  */
+	return ptr;
+
+      /* If MREMAP_DUP is ever introduced and implemented, try mmap
+	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with
+	 MREMAP_DUP and prot at this point.  */
+    }
+
+  if (execsize == 0 || execfd == -1)
+    {
+      pthread_mutex_lock (&open_temp_exec_file_mutex);
+      ptr = dlmmap_locked (start, length, prot, flags, offset);
+      pthread_mutex_unlock (&open_temp_exec_file_mutex);
+
+      return ptr;
+    }
+
+  return dlmmap_locked (start, length, prot, flags, offset);
+}
+
+/* Release memory at the given address, as well as the corresponding
+   executable page if it's separate.  */
+static int
+dlmunmap (void *start, size_t length)
+{
+  /* We don't bother decreasing execsize or truncating the file, since
+     we can't quite tell whether we're unmapping the end of the file.
+     We don't expect frequent deallocation anyway.  If we did, we
+     could locate pages in the file by writing to the pages being
+     deallocated and checking that the file contents change.
+     Yuck.  */
+  msegmentptr seg = segment_holding (gm, start);
+  void *code;
+
+#if FFI_CLOSURE_TEST
+  printf ("unmapping %zi\n", length);
+#endif
+
+  if (seg && (code = add_segment_exec_offset (start, seg)) != start)
+    {
+      int ret = munmap (code, length);
+      if (ret)
+	return ret;
+    }
+
+  return munmap (start, length);
+}
+
+#if FFI_CLOSURE_FREE_CODE
+/* Return segment holding given code address.  */
+static msegmentptr
+segment_holding_code (mstate m, char* addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= add_segment_exec_offset (sp->base, sp)
+	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+#endif
+
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+/* Allocate a chunk of memory with the given size.  Returns a pointer
+   to the writable address, and sets *CODE to the executable
+   corresponding virtual address.  */
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  *code = malloc(size);
+  return *code;
+#if 0
+  void *ptr;
+
+  if (!code)
+    return NULL;
+
+  ptr = dlmalloc (size);
+
+  if (ptr)
+    {
+      msegmentptr seg = segment_holding (gm, ptr);
+
+      *code = add_segment_exec_offset (ptr, seg);
+    }
+
+  return ptr;
+#endif
+}
+
+/* Release a chunk of memory allocated with ffi_closure_alloc.  If
+   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
+   writable or the executable address given.  Otherwise, only the
+   writable address can be provided here.  */
+void
+ffi_closure_free (void *ptr)
+{
+#if 0
+#if FFI_CLOSURE_FREE_CODE
+  msegmentptr seg = segment_holding_code(gm, ptr);
+
+  if (seg)
+    ptr = sub_segment_exec_offset(ptr, seg);
+#endif
+
+  dlfree(ptr);
+#endif
+  free(ptr);
+}
+
+
+#if FFI_CLOSURE_TEST
+/* Do some internal sanity testing to make sure allocation and
+   deallocation of pages are working as intended.  */
+int main ()
+{
+  void *p[3];
+#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0)
+#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0)
+  GET (0, malloc_getpagesize / 2);
+  GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*));
+  PUT (1);
+  GET (1, 2 * malloc_getpagesize);
+  GET (2, malloc_getpagesize / 2);
+  PUT (1);
+  PUT (0);
+  PUT (2);
+  return 0;
+}
+#endif /* FFI_CLOSURE_TEST */
+# else /* ! FFI_MMAP_EXEC_WRIT */
+
+/* On many systems, memory returned by malloc is writable and
+   executable, so just use it.  */
+
+#include <stdlib.h>
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  return *code = malloc (size);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  free (ptr);
+}
+
+# endif /* ! FFI_MMAP_EXEC_WRIT */
+#endif /* FFI_CLOSURES */
diff --git a/lib/wrappers/libffi/gcc/ffi.c b/lib/wrappers/libffi/gcc/ffi.c
new file mode 100644
index 000000000..0600414d4
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi.c
@@ -0,0 +1,841 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifdef X86_WIN32
+  size_t p_stack_args[2];
+  void *p_stack_data[2];
+  char *argp2 = stack;
+  int stack_args_count = 0;
+  int cabi = ecif->cif->abi;
+#endif
+
+  argp = stack;
+
+  if ((ecif->cif->flags == FFI_TYPE_STRUCT
+       || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+          && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+      )
+    {
+      *(void **) argp = ecif->rvalue;
+#ifdef X86_WIN32
+      /* For fastcall/thiscall this is first register-passed
+         argument.  */
+      if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	{
+	  p_stack_args[stack_args_count] = sizeof (void*);
+	  p_stack_data[stack_args_count] = argp;
+	  ++stack_args_count;
+	}
+#endif
+      argp += sizeof(void*);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+#ifdef X86_WIN64
+      if (z > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = sizeof(ffi_arg);
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < sizeof(ffi_arg))
+        {
+          z = sizeof(ffi_arg);
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+
+#ifdef X86_WIN32
+    /* For thiscall/fastcall convention register-passed arguments
+       are the first two none-floating-point arguments with a size
+       smaller or equal to sizeof (void*).  */
+    if ((cabi == FFI_THISCALL && stack_args_count < 1)
+        || (cabi == FFI_FASTCALL && stack_args_count < 2))
+      {
+	if (z <= 4
+	    && ((*p_arg)->type != FFI_TYPE_FLOAT
+	        && (*p_arg)->type != FFI_TYPE_STRUCT))
+	  {
+	    p_stack_args[stack_args_count] = z;
+	    p_stack_data[stack_args_count] = argp;
+	    ++stack_args_count;
+	  }
+      }
+#endif
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+#ifdef X86_WIN32
+  /* We need to move the register-passed arguments for thiscall/fastcall
+     on top of stack, so that those can be moved to registers ecx/edx by
+     call-handler.  */
+  if (stack_args_count > 0)
+    {
+      size_t zz = (p_stack_args[0] + 3) & ~3;
+      char *h;
+
+      /* Move first argument to top-stack position.  */
+      if (p_stack_data[0] != argp2)
+	{
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[0], zz);
+	  memmove (argp2 + zz, argp2,
+	           (size_t) ((char *) p_stack_data[0] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+
+      argp2 += zz;
+      --stack_args_count;
+      if (zz > 4)
+	stack_args_count = 0;
+
+      /* If we have a second argument, then move it on top
+         after the first one.  */
+      if (stack_args_count > 0 && p_stack_data[1] != argp2)
+	{
+	  zz = p_stack_args[1];
+	  zz = (zz + 3) & ~3;
+	  h = alloca (zz + 1);
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[1], zz);
+	  memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+    }
+#endif
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+#ifdef X86_WIN32
+          if (cif->abi == FFI_MS_CDECL)
+            cif->flags = FFI_TYPE_MS_STRUCT;
+          else
+#endif
+            cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && cif->rtype->size != 1 && cif->rtype->size != 2
+      && cif->rtype->size != 4 && cif->rtype->size != 8)
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                     cif->flags, ecif.rvalue, fn);
+      break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC or SUNPRO_C -- standard conventions apply. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+                                         void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+                            ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+#ifdef X86_WIN64
+  if (cif->rtype->size > sizeof(ffi_arg)
+      || (cif->flags == FFI_TYPE_STRUCT
+          && (cif->rtype->size != 1 && cif->rtype->size != 2
+              && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#else
+  if ( cif->flags == FFI_TYPE_STRUCT
+       || cif->flags == FFI_TYPE_MS_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#endif
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp) {
+        argp = (char *) ALIGN(argp, sizeof(void*));
+      }
+
+#ifdef X86_WIN64
+      if ((*p_arg)->size > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+                  && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+        {
+          z = sizeof(void *);
+          *p_argv = *(void **)argp;
+        }
+      else
+#endif
+        {
+          z = (*p_arg)->size;
+          
+          /* because we're little endian, this is what it turns into.   */
+          
+          *p_argv = (void*) argp;
+        }
+          
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 49);  \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned int *) &__tramp[0] = 0x8324048b;	/* mov (%esp), %eax */ \
+   *(unsigned int *) &__tramp[4] = 0x4c890cec;	/* sub $12, %esp */ \
+   *(unsigned int *) &__tramp[8] = 0x04890424;	/* mov %ecx, 4(%esp) */ \
+   *(unsigned char*) &__tramp[12] = 0x24;	/* mov %eax, (%esp) */ \
+   *(unsigned char*) &__tramp[13] = 0xb8; \
+   *(unsigned int *) &__tramp[14] = __size;	/* mov __size, %eax */ \
+   *(unsigned int *) &__tramp[18] = 0x08244c8d;	/* lea 8(%esp), %ecx */ \
+   *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
+   *(unsigned short*) &__tramp[26] = 0x0b74;	/* jz 1f */ \
+   *(unsigned int *) &__tramp[28] = 0x8908518b;	/* 2b: mov 8(%ecx), %edx */ \
+   *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
+   *(unsigned char*) &__tramp[36] = 0x48;	/* dec %eax */ \
+   *(unsigned short*) &__tramp[37] = 0xf575;	/* jnz 2b ; 1f: */ \
+   *(unsigned char*) &__tramp[39] = 0xb8; \
+   *(unsigned int*)  &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[44] = 0xe8; \
+   *(unsigned int*)  &__tramp[45] = __dis; /* call __fun  */ \
+   *(unsigned char*)  &__tramp[49] = 0xc2; /* ret  */ \
+   *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
+				    &ffi_closure_THISCALL,
+				    (void*)codeloc,
+				    cif->bytes);
+    }
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc, cif->bytes);
+    }
+  else if (cif->abi == FFI_MS_CDECL)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+    if (cif->abi != FFI_THISCALL)
+#endif
+    return FFI_BAD_ABI;
+  }
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+#ifdef X86_WIN32
+  if (cif->abi == FFI_SYSV)
+    {
+#endif
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+#ifdef X86_WIN32
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+				    codeloc, cif->bytes);
+    }
+#endif
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  cif->abi = abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  cif->abi = abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
diff --git a/lib/wrappers/libffi/gcc/ffi64.c b/lib/wrappers/libffi/gcc/ffi64.c
new file mode 100644
index 000000000..2014af24c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi64.c
@@ -0,0 +1,673 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+             
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS]; 
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	int size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  /* Sanity check on the cif ABI.  */
+  {
+    int abi = cif->abi;
+    if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+      return FFI_BAD_ABI;
+  }
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *((unsigned long long * volatile) &tramp[1])
+    = (unsigned long) ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *((unsigned long long * volatile) &tramp[6])
+    = (unsigned long) codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/lib/wrappers/libffi/gcc/prep_cif.c b/lib/wrappers/libffi/gcc/prep_cif.c
new file mode 100644
index 000000000..e8ec5cf1e
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/prep_cif.c
@@ -0,0 +1,237 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green
+                Copyright (c) 1996, 1998, 2007  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(ffi_type *arg)
+{
+  ffi_type **ptr;
+
+  if (UNLIKELY(arg == NULL || arg->elements == NULL))
+    return FFI_BAD_TYPEDEF;
+
+  arg->size = 0;
+  arg->alignment = 0;
+
+  ptr = &(arg->elements[0]);
+
+  if (UNLIKELY(ptr == 0))
+    return FFI_BAD_TYPEDEF;
+
+  while ((*ptr) != NULL)
+    {
+      if (UNLIKELY(((*ptr)->size == 0)
+		    && (initialize_aggregate((*ptr)) != FFI_OK)))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+/* For non variadic functions isvariadic should be 0 and
+   nfixedargs==ntotalargs.
+
+   For variadic calls, isvariadic should be 1 and nfixedargs
+   and ntotalargs set as appropriate. nfixedargs must always be >=1 */
+
+
+ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
+			     unsigned int isvariadic,
+                             unsigned int nfixedargs,
+                             unsigned int ntotalargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1));
+  FFI_ASSERT(nfixedargs <= ntotalargs);
+
+#ifndef X86_WIN32
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+#else
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI || abi == FFI_THISCALL))
+    return FFI_BAD_ABI;
+#endif
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = ntotalargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
+#if !defined M68K && !defined X86_ANY && !defined S390 && !defined PA
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+#ifdef TILE
+      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+      && (cif->rtype->size > 16)
+#endif
+
+     )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined X86_ANY && !defined S390 && !defined PA
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+
+#ifdef TILE
+	  if (bytes < 10 * FFI_SIZEOF_ARG &&
+	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+	    {
+	      /* An argument is never split between the 10 parameter
+		 registers and the stack.  */
+	      bytes = 10 * FFI_SIZEOF_ARG;
+	    }
+#endif
+#ifdef XTENSA
+	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+	    bytes = 6*4;
+#endif
+
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+#ifdef FFI_TARGET_SPECIFIC_VARIADIC
+  if (isvariadic)
+	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs);
+#endif
+
+  return ffi_prep_cif_machdep(cif);
+}
+#endif /* not __CRIS__ */
+
+ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes);
+}
+
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+                            ffi_abi abi,
+                            unsigned int nfixedargs,
+                            unsigned int ntotalargs,
+                            ffi_type *rtype,
+                            ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+}
+
+#if FFI_CLOSURES
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+{
+  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure);
+}
+
+#endif
diff --git a/lib/wrappers/libffi/gcc/types.c b/lib/wrappers/libffi/gcc/types.c
new file mode 100644
index 000000000..0a11eb0fb
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/types.c
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* Hide the basic type definitions from the header file, so that we
+   can redefine them here as "const".  */
+#define LIBFFI_HIDE_BASIC_TYPES
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_TYPEDEF(name, type, id)		\
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
+const ffi_type ffi_type_##name = {		\
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}
+
+/* Size and alignment are fake here. They must not be 0. */
+const ffi_type ffi_type_void = {
+  1, 1, FFI_TYPE_VOID, NULL
+};
+
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64);
+
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER);
+
+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE);
+
+#ifdef __alpha__
+/* Even if we're not configured to default to 128-bit long double, 
+   maintain binary compatibility, as -mlong-double-128 can be used
+   at any time.  */
+/* Validate the hard-coded number below.  */
+# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
+#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
+#endif
diff --git a/lib/wrappers/libffi/gcc/win32_asm.asm b/lib/wrappers/libffi/gcc/win32_asm.asm
new file mode 100644
index 000000000..ce3c4f3f3
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.asm
@@ -0,0 +1,759 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffitarget.h>
+
+	.text
+ 
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_call_win32
+#ifndef __OS2__
+	.def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        // Make room for all of the new args.
+        movl  20(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        // Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+ 
+        // Return stack to previous state and call the function
+        addl  $8,%esp
+
+	// Handle fastcall and thiscall
+	cmpl $3, 16(%ebp)  // FFI_THISCALL
+	jz .do_thiscall
+	cmpl $4, 16(%ebp) // FFI_FASTCALL
+	jnz .do_fncall
+	movl (%esp), %ecx
+	movl 4(%esp), %edx
+	addl $8, %esp
+	jmp .do_fncall
+.do_thiscall:
+	movl (%esp), %ecx
+	addl $4, %esp
+
+.do_fncall:
+	 
+        // FIXME: Align the stack to a 128-bit boundary to avoid
+        // potential performance hits.
+
+        call  *32(%ebp)
+ 
+        // stdcall functions pop arguments off the stack themselves
+
+        // Load %ecx with the return type code
+        movl  24(%ebp),%ecx
+ 
+        // If the return value pointer is NULL, assume no return value.
+        cmpl  $0,28(%ebp)
+        jne   0f
+ 
+        // Even if there is no space for the return value, we are
+        // obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   .Lnoretval
+        fstp  %st(0)
+ 
+        jmp   .Lepilogue
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lstore_table:
+	.long	.Lnoretval		/* FFI_TYPE_VOID */
+	.long	.Lretint		/* FFI_TYPE_INT */
+	.long	.Lretfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lretdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lretlongdouble		/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lretuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lretsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lretuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lretsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lretint		/* FFI_TYPE_UINT32 */
+	.long	.Lretint		/* FFI_TYPE_SINT32 */
+	.long	.Lretint64		/* FFI_TYPE_UINT64 */
+	.long	.Lretint64		/* FFI_TYPE_SINT64 */
+	.long	.Lretstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lretint		/* FFI_TYPE_POINTER */
+	.long	.Lretstruct1b		/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lretstruct2b		/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lretstruct4b		/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lretstruct		/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%ecx, %ecx
+	add	%ecx, %ecx
+	add	(%esp),%ecx
+	add	$4, %esp
+	jmp	*(%ecx)
+
+	/* Sign/zero extend as appropriate.  */
+.Lretsint8:
+	movsbl	%al, %eax
+	jmp	.Lretint
+
+.Lretsint16:
+	movswl	%ax, %eax
+	jmp	.Lretint
+
+.Lretuint8:
+	movzbl	%al, %eax
+	jmp	.Lretint
+
+.Lretuint16:
+	movzwl	%ax, %eax
+	jmp	.Lretint
+
+.Lretint:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretfloat:
+         // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstps (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretlongdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretint64:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+	jmp   .Lepilogue
+
+.Lretstruct1b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretstruct2b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct4b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct:
+        // Nothing to do!
+ 
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+	.globl	_ffi_closure_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	jmp	.stub
+.LFE1:
+
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+.stub:
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+	.long	.Lcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lcls_retint		/* FFI_TYPE_INT */
+	.long	.Lcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lcls_retdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lcls_retuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lcls_retsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lcls_retstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lcls_retmsstruct	/* FFI_TYPE_MS_STRUCT */
+
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lcls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct:
+        // Caller expects us to pop struct return value pointer hidden arg.
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$0x4
+
+.Lcls_retmsstruct:
+	// Caller expects us to return a pointer to the real return value.
+	mov	%ecx, %eax
+	// Caller doesn't expects us to pop struct return value pointer hidden arg.
+	jmp	.Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+        .balign 16
+	.globl	_ffi_closure_raw_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_raw_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	jmp	.stubraw
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_raw_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_raw_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_SYSV:
+.LFB4:
+	pushl	%ebp
+.LCFI6:
+	movl	%esp, %ebp
+.LCFI7:
+	pushl	%esi
+.LCFI8:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+.stubraw:
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+	.long	.Lrcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lrcls_retint		/* FFI_TYPE_INT */
+	.long	.Lrcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lrcls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lrcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lrcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lrcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lrcls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lrcls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lrcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lrcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lrcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lrcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lrcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lrcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lrcls_retsint8:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retsint16:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint8:
+	movzbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint16:
+	movzwl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct1:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct2:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct4:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct:
+	// Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+        // This assumes we are using gas.
+	.balign	16
+	.globl	_ffi_closure_STDCALL
+#ifndef __OS2__
+	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_STDCALL:
+.LFB5:
+	pushl	%ebp
+.LCFI9:
+	movl	%esp, %ebp
+.LCFI10:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	movl	%edx, 4(%esp)	/* args */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+	.long	.Lscls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lscls_retint		/* FFI_TYPE_INT */
+	.long	.Lscls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lscls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lscls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lscls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lscls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lscls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lscls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lscls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lscls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lscls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lscls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lscls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lscls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lscls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lscls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lscls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lscls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retfloat:
+	flds	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct:
+	// Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#ifndef __OS2__
+	.section	.eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+	.long	.LECIE1-.LASCIE1  /* Length of Common Information Entry */
+.LASCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE1:
+
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+	.long	.LEFDE4-.LASFDE4	/* FDE Length */
+.LASFDE4:
+	.long	.LASFDE4-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB4-.	/* FDE initial location */
+#else
+	.long	.LFB4
+#endif
+	.long	.LFE4-.LFB4	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LFB4
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI6
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI8-.LCFI7
+	.byte	0x86	/* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+	.byte	0x3	/* .uleb128 0x3 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+	.long	.LEFDE5-.LASFDE5	/* FDE Length */
+.LASFDE5:
+	.long	.LASFDE5-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB5-.	/* FDE initial location */
+#else
+	.long	.LFB5
+#endif
+	.long	.LFE5-.LFB5	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI9-.LFB5
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI10-.LCFI9
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win32_asm.s b/lib/wrappers/libffi/gcc/win32_asm.s
new file mode 100644
index 000000000..7a3e7f16c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.s
@@ -0,0 +1,736 @@
+# 1 "gcc\\win32_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win32_asm.asm"
+# 33 "gcc\\win32_asm.asm"
+# 1 "common/fficonfig.h" 1
+# 34 "gcc\\win32_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 35 "gcc\\win32_asm.asm" 2
+
+
+ .text
+
+
+        .balign 16
+ .globl _ffi_call_win32
+
+ .def _ffi_call_win32; .scl 2; .type 32; .endef
+
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl %esp,%ebp
+.LCFI1:
+
+        movl 20(%ebp),%ecx
+        subl %ecx,%esp
+
+        movl %esp,%eax
+
+
+        pushl 12(%ebp)
+        pushl %eax
+        call *8(%ebp)
+
+
+        addl $8,%esp
+
+
+ cmpl $3, 16(%ebp)
+ jz .do_thiscall
+ cmpl $4, 16(%ebp)
+ jnz .do_fncall
+ movl (%esp), %ecx
+ movl 4(%esp), %edx
+ addl $8, %esp
+ jmp .do_fncall
+.do_thiscall:
+ movl (%esp), %ecx
+ addl $4, %esp
+
+.do_fncall:
+
+
+
+
+        call *32(%ebp)
+
+
+
+
+        movl 24(%ebp),%ecx
+
+
+        cmpl $0,28(%ebp)
+        jne 0f
+
+
+
+        cmpl $2,%ecx
+        jne .Lnoretval
+        fstp %st(0)
+
+        jmp .Lepilogue
+
+0:
+ call 1f
+
+.Lstore_table:
+ .long .Lnoretval
+ .long .Lretint
+ .long .Lretfloat
+ .long .Lretdouble
+ .long .Lretlongdouble
+ .long .Lretuint8
+ .long .Lretsint8
+ .long .Lretuint16
+ .long .Lretsint16
+ .long .Lretint
+ .long .Lretint
+ .long .Lretint64
+ .long .Lretint64
+ .long .Lretstruct
+ .long .Lretint
+ .long .Lretstruct1b
+ .long .Lretstruct2b
+ .long .Lretstruct4b
+ .long .Lretstruct
+1:
+ add %ecx, %ecx
+ add %ecx, %ecx
+ add (%esp),%ecx
+ add $4, %esp
+ jmp *(%ecx)
+
+
+.Lretsint8:
+ movsbl %al, %eax
+ jmp .Lretint
+
+.Lretsint16:
+ movswl %ax, %eax
+ jmp .Lretint
+
+.Lretuint8:
+ movzbl %al, %eax
+ jmp .Lretint
+
+.Lretuint16:
+ movzwl %ax, %eax
+ jmp .Lretint
+
+.Lretint:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretfloat:
+
+        movl 28(%ebp),%ecx
+        fstps (%ecx)
+        jmp .Lepilogue
+
+.Lretdouble:
+
+        movl 28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp .Lepilogue
+
+.Lretlongdouble:
+
+        movl 28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp .Lepilogue
+
+.Lretint64:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        movl %edx,4(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct1b:
+
+        movl 28(%ebp),%ecx
+        movb %al,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct2b:
+
+        movl 28(%ebp),%ecx
+        movw %ax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct4b:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct:
+
+
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+ .globl _ffi_closure_THISCALL
+
+ .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ jmp .stub
+.LFE1:
+
+
+        .balign 16
+ .globl _ffi_closure_SYSV
+
+ .def _ffi_closure_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 8(%ebp), %edx
+.stub:
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+
+0:
+ call 1f
+
+.Lcls_store_table:
+ .long .Lcls_noretval
+ .long .Lcls_retint
+ .long .Lcls_retfloat
+ .long .Lcls_retdouble
+ .long .Lcls_retldouble
+ .long .Lcls_retuint8
+ .long .Lcls_retsint8
+ .long .Lcls_retuint16
+ .long .Lcls_retsint16
+ .long .Lcls_retint
+ .long .Lcls_retint
+ .long .Lcls_retllong
+ .long .Lcls_retllong
+ .long .Lcls_retstruct
+ .long .Lcls_retint
+ .long .Lcls_retstruct1
+ .long .Lcls_retstruct2
+ .long .Lcls_retstruct4
+ .long .Lcls_retmsstruct
+
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lcls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct:
+
+ movl %ebp, %esp
+ popl %ebp
+ ret $0x4
+
+.Lcls_retmsstruct:
+
+ mov %ecx, %eax
+
+ jmp .Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+
+
+
+
+
+
+        .balign 16
+ .globl _ffi_closure_raw_THISCALL
+
+ .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 12(%ebp), %edx
+ jmp .stubraw
+
+        .balign 16
+ .globl _ffi_closure_raw_SYSV
+
+ .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_SYSV:
+.LFB4:
+ pushl %ebp
+.LCFI6:
+ movl %esp, %ebp
+.LCFI7:
+ pushl %esi
+.LCFI8:
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 8(%ebp), %edx
+.stubraw:
+ movl %edx, 8(%esp)
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp)
+ movl %esi, (%esp)
+ call *(((52 + 3) & ~3) + 4)(%eax)
+ movl 20(%esi), %eax
+0:
+ call 1f
+
+.Lrcls_store_table:
+ .long .Lrcls_noretval
+ .long .Lrcls_retint
+ .long .Lrcls_retfloat
+ .long .Lrcls_retdouble
+ .long .Lrcls_retldouble
+ .long .Lrcls_retuint8
+ .long .Lrcls_retsint8
+ .long .Lrcls_retuint16
+ .long .Lrcls_retsint16
+ .long .Lrcls_retint
+ .long .Lrcls_retint
+ .long .Lrcls_retllong
+ .long .Lrcls_retllong
+ .long .Lrcls_retstruct
+ .long .Lrcls_retint
+ .long .Lrcls_retstruct1
+ .long .Lrcls_retstruct2
+ .long .Lrcls_retstruct4
+ .long .Lrcls_retstruct
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lrcls_retsint8:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retsint16:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint8:
+ movzbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint16:
+ movzwl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct1:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct2:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct4:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct:
+
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+
+
+
+ .balign 16
+ .globl _ffi_closure_STDCALL
+
+ .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_STDCALL:
+.LFB5:
+ pushl %ebp
+.LCFI9:
+ movl %esp, %ebp
+.LCFI10:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+0:
+ call 1f
+
+.Lscls_store_table:
+ .long .Lscls_noretval
+ .long .Lscls_retint
+ .long .Lscls_retfloat
+ .long .Lscls_retdouble
+ .long .Lscls_retldouble
+ .long .Lscls_retuint8
+ .long .Lscls_retsint8
+ .long .Lscls_retuint16
+ .long .Lscls_retsint16
+ .long .Lscls_retint
+ .long .Lscls_retint
+ .long .Lscls_retllong
+ .long .Lscls_retllong
+ .long .Lscls_retstruct
+ .long .Lscls_retint
+ .long .Lscls_retstruct1
+ .long .Lscls_retstruct2
+ .long .Lscls_retstruct4
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lscls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retint:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retfloat:
+ flds (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retdouble:
+ fldl (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retldouble:
+ fldt (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct:
+
+
+.Lscls_noretval:
+.Lscls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+
+ .section .eh_frame,"w"
+
+.Lframe1:
+.LSCIE1:
+ .long .LECIE1-.LASCIE1
+.LASCIE1:
+ .long 0x0
+ .byte 0x1
+
+
+
+ .ascii "\0"
+
+ .byte 0x1
+ .byte 0x7c
+ .byte 0x8
+
+
+
+
+ .byte 0xc
+ .byte 0x4
+ .byte 0x4
+ .byte 0x88
+ .byte 0x1
+ .align 4
+.LECIE1:
+
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1
+.LASFDE1:
+ .long .LASFDE1-.Lframe1
+
+
+
+ .long .LFB1
+
+ .long .LFE1-.LFB1
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI0-.LFB1
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI1-.LCFI0
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE1:
+
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3
+.LASFDE3:
+ .long .LASFDE3-.Lframe1
+
+
+
+ .long .LFB3
+
+ .long .LFE3-.LFB3
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI4-.LFB3
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI5-.LCFI4
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE3:
+
+
+
+.LSFDE4:
+ .long .LEFDE4-.LASFDE4
+.LASFDE4:
+ .long .LASFDE4-.Lframe1
+
+
+
+ .long .LFB4
+
+ .long .LFE4-.LFB4
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI6-.LFB4
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI7-.LCFI6
+ .byte 0xd
+ .byte 0x5
+
+ .byte 0x4
+ .long .LCFI8-.LCFI7
+ .byte 0x86
+ .byte 0x3
+
+
+ .align 4
+.LEFDE4:
+
+
+
+.LSFDE5:
+ .long .LEFDE5-.LASFDE5
+.LASFDE5:
+ .long .LASFDE5-.Lframe1
+
+
+
+ .long .LFB5
+
+ .long .LFE5-.LFB5
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI9-.LFB5
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI10-.LCFI9
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win64_asm.asm b/lib/wrappers/libffi/gcc/win64_asm.asm
new file mode 100644
index 000000000..1dc98f99a
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.asm
@@ -0,0 +1,467 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+// ffi_closure_win64 will be called with these registers set:
+//    rax points to 'closure'
+//    r11 contains a bit mask that specifies which of the
+//    first four parameters are float or double
+// // It must move the parameters passed in registers to their stack location,
+// call ffi_closure_win64_inner for the actual work, then return the result.
+// 
+	.balign 16
+        .globl SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	// copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+// ALLOCSTACK 40
+	sub	$40, %rsp
+// ENDPROLOG
+	mov	%rax, %rcx	// context is first parameter
+	mov	%rsp, %rdx	// stack is second parameter
+	add	$48, %rdx	// point to start of arguments
+	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
+	callq	*%rax		// call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	// If the closure returned a float,
+                                // ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+        // copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        // PUSHREG rbp
+	push	%rbp
+        // ALLOCSTACK 48
+	sub	$48,%rsp
+        // SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        // ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
diff --git a/lib/wrappers/libffi/gcc/win64_asm.s b/lib/wrappers/libffi/gcc/win64_asm.s
new file mode 100644
index 000000000..f2c2df10d
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.s
@@ -0,0 +1,227 @@
+# 1 "gcc\\win64_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win64_asm.asm"
+
+# 1 "common/fficonfig.h" 1
+# 3 "gcc\\win64_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 4 "gcc\\win64_asm.asm" 2
+# 244 "gcc\\win64_asm.asm"
+.text
+
+.extern ffi_closure_win64_inner
+# 255 "gcc\\win64_asm.asm"
+ .balign 16
+        .globl ffi_closure_win64
+ffi_closure_win64:
+
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+
+ sub $40, %rsp
+
+ mov %rax, %rcx
+ mov %rsp, %rdx
+ add $48, %rdx
+ mov $SYMBOL_NAME(ffi_closure_win64_inner), %rax
+ callq *%rax
+ add $40, %rsp
+ movq %rax, %xmm0
+
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+        .globl ffi_call_win64
+ffi_call_win64:
+
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+
+ push %rbp
+
+ sub $48,%rsp
+
+ lea 32(%rsp),%rbp
+
+
+ mov 48(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, 0(%rbp)
+
+ mov 40(%rbp), %rdx
+ mov 0(%rbp), %rcx
+ callq *32(%rbp)
+
+ mov 0(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *72(%rbp)
+.Lret_struct4b:
+  cmpl $FFI_TYPE_SMALL_STRUCT_4B, 56(%rbp)
+  jne .Lret_struct2b
+
+ mov 64(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, 56(%rbp)
+ jne .Lret_struct1b
+
+ mov 64(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, 56(%rbp)
+ jne .Lret_uint8
+
+ mov 64(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, 56(%rbp)
+ jne .Lret_sint8
+
+        mov 64(%rbp), %rcx
+        movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, 56(%rbp)
+ jne .Lret_uint16
+
+        mov 64(%rbp), %rcx
+        movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, 56(%rbp)
+ jne .Lret_sint16
+
+        mov 64(%rbp), %rcx
+        movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, 56(%rbp)
+ jne .Lret_uint32
+
+        mov 64(%rbp), %rcx
+        movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $9, 56(%rbp)
+ jne .Lret_sint32
+
+        mov 64(%rbp), %rcx
+        movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+  cmpl $10, 56(%rbp)
+  jne .Lret_float
+
+ mov 64(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+  cmpl $2, 56(%rbp)
+  jne .Lret_double
+
+  mov 64(%rbp), %rax
+  movss %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_double:
+  cmpl $3, 56(%rbp)
+  jne .Lret_sint64
+
+  mov 64(%rbp), %rax
+  movlpd %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_sint64:
+   cmpl $12, 56(%rbp)
+   jne .Lret_void
+
+  mov 64(%rbp), %rcx
+  mov %rax, (%rcx)
+  jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
diff --git a/lib/wrappers/libffi.nim b/lib/wrappers/libffi/libffi.nim
index 514ce024f..1b6130103 100644
--- a/lib/wrappers/libffi.nim
+++ b/lib/wrappers/libffi/libffi.nim
@@ -26,12 +26,36 @@
 
 {.deadCodeElim: on.}
 
-when defined(windows): 
-  const libffidll* = "libffi.dll"
-elif defined(macosx): 
-  const libffidll* = "libffi.dylib"
-else: 
-  const libffidll* = "libffi.so"
+when defined(windows):
+  # on Windows we don't use a DLL but instead embed libffi directly:
+  {.pragma: mylib, header: r"ffi.h".}
+
+  #{.compile: r"common\malloc_closure.c".}
+  {.compile: r"common\raw_api.c".}
+  when defined(vcc):
+    {.compile: r"msvc\ffi.c".}
+    {.compile: r"msvc\prep_cif.c".}
+    {.compile: r"msvc\win32.c".}
+    {.compile: r"msvc\types.c".}
+    when defined(cpu64):
+      {.compile: r"msvc\win64_asm.asm".}
+    else:
+      {.compile: r"msvc\win32_asm.asm".}
+  else:
+    {.compile: r"gcc\ffi.c".}
+    {.compile: r"gcc\prep_cif.c".}
+    {.compile: r"gcc\types.c".}
+    {.compile: r"gcc\closures.c".}
+    when defined(cpu64):
+      {.compile: r"gcc\ffi64.c".}
+      {.compile: r"gcc\win64_asm.S".}
+    else:
+      {.compile: r"gcc\win32_asm.S".}
+
+elif defined(macosx):
+  {.pragma: mylib, dynlib: "libffi.dylib".}
+else:
+  {.pragma: mylib, dynlib: "libffi.so".}
 
 type
   TArg* = int
@@ -88,19 +112,19 @@ type
     elements*: ptr ptr TType
 
 var
-  type_void* {.importc: "ffi_type_void", dynlib: libffidll.}: TType
-  type_uint8* {.importc: "ffi_type_uint8", dynlib: libffidll.}: TType
-  type_sint8* {.importc: "ffi_type_sint8", dynlib: libffidll.}: TType
-  type_uint16* {.importc: "ffi_type_uint16", dynlib: libffidll.}: TType
-  type_sint16* {.importc: "ffi_type_sint16", dynlib: libffidll.}: TType
-  type_uint32* {.importc: "ffi_type_uint32", dynlib: libffidll.}: TType
-  type_sint32* {.importc: "ffi_type_sint32", dynlib: libffidll.}: TType
-  type_uint64* {.importc: "ffi_type_uint64", dynlib: libffidll.}: TType
-  type_sint64* {.importc: "ffi_type_sint64", dynlib: libffidll.}: TType
-  type_float* {.importc: "ffi_type_float", dynlib: libffidll.}: TType
-  type_double* {.importc: "ffi_type_double", dynlib: libffidll.}: TType
-  type_pointer* {.importc: "ffi_type_pointer", dynlib: libffidll.}: TType
-  type_longdouble* {.importc: "ffi_type_longdouble", dynlib: libffidll.}: TType
+  type_void* {.importc: "ffi_type_void", mylib.}: TType
+  type_uint8* {.importc: "ffi_type_uint8", mylib.}: TType
+  type_sint8* {.importc: "ffi_type_sint8", mylib.}: TType
+  type_uint16* {.importc: "ffi_type_uint16", mylib.}: TType
+  type_sint16* {.importc: "ffi_type_sint16", mylib.}: TType
+  type_uint32* {.importc: "ffi_type_uint32", mylib.}: TType
+  type_sint32* {.importc: "ffi_type_sint32", mylib.}: TType
+  type_uint64* {.importc: "ffi_type_uint64", mylib.}: TType
+  type_sint64* {.importc: "ffi_type_sint64", mylib.}: TType
+  type_float* {.importc: "ffi_type_float", mylib.}: TType
+  type_double* {.importc: "ffi_type_double", mylib.}: TType
+  type_pointer* {.importc: "ffi_type_pointer", mylib.}: TType
+  type_longdouble* {.importc: "ffi_type_longdouble", mylib.}: TType
 
 type 
   Tstatus* {.size: sizeof(cint).} = enum 
@@ -119,20 +143,18 @@ type
     sint*: TSArg
 
 proc raw_call*(cif: var Tcif; fn: proc () {.cdecl.}; rvalue: pointer; 
-               avalue: ptr TRaw) {.cdecl, importc: "ffi_raw_call", 
-                                   dynlib: libffidll.}
+               avalue: ptr TRaw) {.cdecl, importc: "ffi_raw_call", mylib.}
 proc ptrarray_to_raw*(cif: var Tcif; args: ptr pointer; raw: ptr TRaw) {.cdecl, 
-    importc: "ffi_ptrarray_to_raw", dynlib: libffidll.}
+    importc: "ffi_ptrarray_to_raw", mylib.}
 proc raw_to_ptrarray*(cif: var Tcif; raw: ptr TRaw; args: ptr pointer) {.cdecl, 
-    importc: "ffi_raw_to_ptrarray", dynlib: libffidll.}
-proc raw_size*(cif: var Tcif): int {.cdecl, importc: "ffi_raw_size", 
-                                     dynlib: libffidll.}
+    importc: "ffi_raw_to_ptrarray", mylib.}
+proc raw_size*(cif: var Tcif): int {.cdecl, importc: "ffi_raw_size", mylib.}
 
 proc prep_cif*(cif: var Tcif; abi: TABI; nargs: cuint; rtype: ptr TType; 
                atypes: ptr ptr TType): TStatus {.cdecl, importc: "ffi_prep_cif", 
-    dynlib: libffidll.}
+    mylib.}
 proc call*(cif: var Tcif; fn: proc () {.cdecl.}; rvalue: pointer; 
-           avalue: ptr pointer) {.cdecl, importc: "ffi_call", dynlib: libffidll.}
+           avalue: ptr pointer) {.cdecl, importc: "ffi_call", mylib.}
 
 # the same with an easier interface:
 type
@@ -141,9 +163,9 @@ type
 
 proc prep_cif*(cif: var Tcif; abi: TABI; nargs: cuint; rtype: ptr TType; 
                atypes: TParamList): TStatus {.cdecl, importc: "ffi_prep_cif",
-    dynlib: libffidll.}
+    mylib.}
 proc call*(cif: var Tcif; fn, rvalue: pointer;
-           avalue: TArgList) {.cdecl, importc: "ffi_call", dynlib: libffidll.}
+           avalue: TArgList) {.cdecl, importc: "ffi_call", mylib.}
 
 # Useful for eliminating compiler warnings 
 ##define FFI_FN(f) ((void (*)(void))f)
diff --git a/lib/wrappers/libffi/msvc/ffi.c b/lib/wrappers/libffi/msvc/ffi.c
new file mode 100644
index 000000000..6e595e9fe
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/ffi.c
@@ -0,0 +1,457 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+   
+   x86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+extern void Py_FatalError(const char *msg);
+
+/*@-exportheader@*/
+void ffi_prep_args(char *stack, extended_cif *ecif)
+/*@=exportheader@*/
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+  if (ecif->cif->rtype->type == FFI_TYPE_STRUCT)
+    {
+      *(void **) argp = ecif->rvalue;
+      argp += sizeof(void *);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void *) - 1) & (size_t) argp)
+	argp = (char *) ALIGN(argp, sizeof(void *));
+
+      z = (*p_arg)->size;
+      if (z < sizeof(int))
+	{
+	  z = sizeof(int);
+	  switch ((*p_arg)->type)
+	    {
+	    case FFI_TYPE_SINT8:
+	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT8:
+	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT16:
+	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT16:
+	      *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT32:
+	      *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT32:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_STRUCT:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    default:
+	      FFI_ASSERT(0);
+	    }
+	}
+      else
+	{
+	  memcpy(argp, *p_argv, z);
+	}
+      p_argv++;
+      argp += z;
+    }
+
+  if (argp >= stack && (unsigned)(argp - stack) > ecif->cif->bytes) 
+    {
+      Py_FatalError("FFI BUG: not enough stack space for arguments");
+    }
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef _WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    default:
+      cif->flags = FFI_TYPE_INT;
+      break;
+    }
+
+  return FFI_OK;
+}
+
+#ifdef _WIN32
+extern int
+ffi_call_x86(void (*)(char *, extended_cif *), 
+	     /*@out@*/ extended_cif *, 
+	     unsigned, unsigned, 
+	     /*@out@*/ unsigned *, 
+	     void (*fn)());
+#endif
+
+#ifdef _WIN64
+extern int
+ffi_call_AMD64(void (*)(char *, extended_cif *),
+		 /*@out@*/ extended_cif *,
+		 unsigned, unsigned,
+		 /*@out@*/ unsigned *,
+		 void (*fn)());
+#endif
+
+int
+ffi_call(/*@dependent@*/ ffi_cif *cif, 
+	 void (*fn)(), 
+	 /*@out@*/ void *rvalue, 
+	 /*@dependent@*/ void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return	*/
+  /* value address then we need to make one		        */
+
+  if ((rvalue == NULL) && 
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      /*@-sysunrecog@*/
+      ecif.rvalue = alloca(cif->rtype->size);
+      /*@=sysunrecog@*/
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#if !defined(_WIN64)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      return ffi_call_x86(ffi_prep_args, &ecif, cif->bytes, 
+			  cif->flags, ecif.rvalue, fn);
+      break;
+#else
+    case FFI_SYSV:
+      /*@-usedef@*/
+      /* Function call needs at least 40 bytes stack size, on win64 AMD64 */
+      return ffi_call_AMD64(ffi_prep_args, &ecif, cif->bytes ? cif->bytes : 40,
+			   cif->flags, ecif.rvalue, fn);
+      /*@=usedef@*/
+      break;
+#endif
+
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+  return -1; /* theller: Hrm. */
+}
+
+
+/** private members **/
+
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+					  void** args, ffi_cif* cif);
+/* This function is jumped to by the trampoline */
+
+#ifdef _WIN64
+void *
+#else
+static void __fastcall
+#endif
+ffi_closure_SYSV (ffi_closure *closure, int *argp)
+{
+  // this is our return value storage
+  long double    res;
+
+  // our various things...
+  ffi_cif       *cif;
+  void         **arg_area;
+  unsigned short rtype;
+  void          *resp = (void*)&res;
+  void *args = &argp[1];
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will re-set RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, (void**)&resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  rtype = cif->flags;
+
+#if defined(_WIN32) && !defined(_WIN64)
+#ifdef _MSC_VER
+  /* now, do a generic return based on the value of rtype */
+  if (rtype == FFI_TYPE_INT)
+    {
+	    _asm mov eax, resp ;
+	    _asm mov eax, [eax] ;
+    }
+  else if (rtype == FFI_TYPE_FLOAT)
+    {
+	    _asm mov eax, resp ;
+	    _asm fld DWORD PTR [eax] ;
+//      asm ("flds (%0)" : : "r" (resp) : "st" );
+    }
+  else if (rtype == FFI_TYPE_DOUBLE)
+    {
+	    _asm mov eax, resp ;
+	    _asm fld QWORD PTR [eax] ;
+//      asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_LONGDOUBLE)
+    {
+//      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_SINT64)
+    {
+	    _asm mov edx, resp ;
+	    _asm mov eax, [edx] ;
+	    _asm mov edx, [edx + 4] ;
+//      asm ("movl 0(%0),%%eax;"
+//	   "movl 4(%0),%%edx" 
+//	   : : "r"(resp)
+//	   : "eax", "edx");
+    }
+#else
+  /* now, do a generic return based on the value of rtype */
+  if (rtype == FFI_TYPE_INT)
+    {
+      asm ("movl (%0),%%eax" : : "r" (resp) : "eax");
+    }
+  else if (rtype == FFI_TYPE_FLOAT)
+    {
+      asm ("flds (%0)" : : "r" (resp) : "st" );
+    }
+  else if (rtype == FFI_TYPE_DOUBLE)
+    {
+      asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_LONGDOUBLE)
+    {
+      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
+    }
+  else if (rtype == FFI_TYPE_SINT64)
+    {
+      asm ("movl 0(%0),%%eax;"
+	   "movl 4(%0),%%edx" 
+	   : : "r"(resp)
+	   : "eax", "edx");
+    }
+#endif
+#endif
+
+#ifdef _WIN64
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+  */
+  return *(void **)resp;
+#endif
+}
+
+/*@-exportheader@*/
+static void 
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
+			    void **avalue, ffi_cif *cif)
+/*@=exportheader@*/
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if ( cif->rtype->type == FFI_TYPE_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += 4;
+  }
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(char *) - 1) & (size_t) argp) {
+	argp = (char *) ALIGN(argp, sizeof(char*));
+      }
+
+      z = (*p_arg)->size;
+
+      /* because we're little endian, this is what it turns into.   */
+
+      *p_argv = (void*) argp;
+
+      p_argv++;
+      argp += z;
+    }
+  
+  return;
+}
+
+/* the cif must already be prep'ed */
+extern void ffi_closure_OUTER();
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+					  ffi_cif* cif,
+					  void (*fun)(ffi_cif*,void*,void**,void*),
+					  void *user_data,
+					  void *codeloc)
+{
+  short bytes;
+  char *tramp;
+#ifdef _WIN64
+  int mask = 0;
+#endif
+  FFI_ASSERT (cif->abi == FFI_SYSV);
+  
+  if (cif->abi == FFI_SYSV)
+    bytes = 0;
+#if !defined(_WIN64)
+  else if (cif->abi == FFI_STDCALL)
+    bytes = cif->bytes;
+#endif
+  else
+    return FFI_BAD_ABI;
+
+  tramp = &closure->tramp[0];
+
+#define BYTES(text) memcpy(tramp, text, sizeof(text)), tramp += sizeof(text)-1
+#define POINTER(x) *(void**)tramp = (void*)(x), tramp += sizeof(void*)
+#define SHORT(x) *(short*)tramp = x, tramp += sizeof(short)
+#define INT(x) *(int*)tramp = x, tramp += sizeof(int)
+
+#ifdef _WIN64
+  if (cif->nargs >= 1 &&
+      (cif->arg_types[0]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[0]->type == FFI_TYPE_DOUBLE))
+    mask |= 1;
+  if (cif->nargs >= 2 &&
+      (cif->arg_types[1]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[1]->type == FFI_TYPE_DOUBLE))
+    mask |= 2;
+  if (cif->nargs >= 3 &&
+      (cif->arg_types[2]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[2]->type == FFI_TYPE_DOUBLE))
+    mask |= 4;
+  if (cif->nargs >= 4 &&
+      (cif->arg_types[3]->type == FFI_TYPE_FLOAT
+       || cif->arg_types[3]->type == FFI_TYPE_DOUBLE))
+    mask |= 8;
+
+  /* 41 BB ----         mov         r11d,mask */
+  BYTES("\x41\xBB"); INT(mask);
+
+  /* 48 B8 --------     mov         rax, closure			*/
+  BYTES("\x48\xB8"); POINTER(closure);
+
+  /* 49 BA --------     mov         r10, ffi_closure_OUTER */
+  BYTES("\x49\xBA"); POINTER(ffi_closure_OUTER);
+
+  /* 41 FF E2           jmp         r10 */
+  BYTES("\x41\xFF\xE2");
+
+#else
+
+  /* mov ecx, closure */
+  BYTES("\xb9"); POINTER(closure);
+
+  /* mov edx, esp */
+  BYTES("\x8b\xd4");
+
+  /* call ffi_closure_SYSV */
+  BYTES("\xe8"); POINTER((char*)&ffi_closure_SYSV - (tramp + 4));
+
+  /* ret bytes */
+  BYTES("\xc2");
+  SHORT(bytes);
+  
+#endif
+
+  if (tramp - &closure->tramp[0] > FFI_TRAMPOLINE_SIZE)
+    Py_FatalError("FFI_TRAMPOLINE_SIZE too small in " __FILE__);
+
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+  return FFI_OK;
+}
diff --git a/lib/wrappers/libffi/msvc/prep_cif.c b/lib/wrappers/libffi/msvc/prep_cif.c
new file mode 100644
index 000000000..2650fa052
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/prep_cif.c
@@ -0,0 +1,175 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(/*@out@*/ ffi_type *arg)
+{
+  ffi_type **ptr; 
+
+  FFI_ASSERT(arg != NULL);
+
+  /*@-usedef@*/
+
+  FFI_ASSERT(arg->elements != NULL);
+  FFI_ASSERT(arg->size == 0);
+  FFI_ASSERT(arg->alignment == 0);
+
+  ptr = &(arg->elements[0]);
+
+  while ((*ptr) != NULL)
+    {
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+      
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ? 
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+
+  /*@=usedef@*/
+}
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif, 
+			ffi_abi abi, unsigned int nargs, 
+			/*@dependent@*/ /*@out@*/ /*@partial@*/ ffi_type *rtype, 
+			/*@dependent@*/ ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((abi > FFI_FIRST_ABI) && (abi <= FFI_DEFAULT_ABI));
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = nargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  /*@-usedef@*/
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+  /*@=usedef@*/
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86-64 and s390 stack space allocation is handled in prep_machdep.  */
+#if !defined M68K && !defined __x86_64__ && !defined S390
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+      /* MSVC returns small structures in registers.  But we have a different
+      workaround: pretend int32 or int64 return type, and converting to
+      structure afterwards. */
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+      )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this 
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined __x86_64__ && !defined S390
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+		/* Don't know if this is a libffi bug or not.  At least on
+		   Windows with MSVC, function call parameters are *not*
+		   aligned in the same way as structure fields are, they are
+		   only aligned in integer boundaries.
+
+		   This doesn't do any harm for cdecl functions and closures,
+		   since the caller cleans up the stack, but it is wrong for
+		   stdcall functions where the callee cleans.
+		*/
+
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+	  
+#endif
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+  return ffi_prep_cif_machdep(cif);
+}
diff --git a/lib/wrappers/libffi/msvc/types.c b/lib/wrappers/libffi/msvc/types.c
new file mode 100644
index 000000000..df32190d1
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/types.c
@@ -0,0 +1,104 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_INTEGRAL_TYPEDEF(n, s, a, t) ffi_type ffi_type_##n = { s, a, t, NULL }
+#define FFI_AGGREGATE_TYPEDEF(n, e) ffi_type ffi_type_##n = { 0, 0, FFI_TYPE_STRUCT, e }
+
+/* Size and alignment are fake here. They must not be 0. */
+FFI_INTEGRAL_TYPEDEF(void, 1, 1, FFI_TYPE_VOID);
+
+FFI_INTEGRAL_TYPEDEF(uint8, 1, 1, FFI_TYPE_UINT8);
+FFI_INTEGRAL_TYPEDEF(sint8, 1, 1, FFI_TYPE_SINT8);
+FFI_INTEGRAL_TYPEDEF(uint16, 2, 2, FFI_TYPE_UINT16);
+FFI_INTEGRAL_TYPEDEF(sint16, 2, 2, FFI_TYPE_SINT16);
+FFI_INTEGRAL_TYPEDEF(uint32, 4, 4, FFI_TYPE_UINT32);
+FFI_INTEGRAL_TYPEDEF(sint32, 4, 4, FFI_TYPE_SINT32);
+FFI_INTEGRAL_TYPEDEF(float, 4, 4, FFI_TYPE_FLOAT);
+
+#if defined ALPHA || defined SPARC64 || defined X86_64 || defined S390X \
+    || defined IA64
+
+FFI_INTEGRAL_TYPEDEF(pointer, 8, 8, FFI_TYPE_POINTER);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(pointer, 4, 4, FFI_TYPE_POINTER);
+
+#endif
+
+#if defined X86 || defined X86_WIN32 || defined ARM || defined M68K
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
+
+#elif defined SH
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(uint64, 8, 8, FFI_TYPE_UINT64);
+FFI_INTEGRAL_TYPEDEF(sint64, 8, 8, FFI_TYPE_SINT64);
+
+#endif
+
+
+#if defined X86 || defined X86_WIN32 || defined M68K
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 12, 4, FFI_TYPE_LONGDOUBLE);
+
+#elif defined ARM || defined SH || defined POWERPC_AIX || defined POWERPC_DARWIN
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 8, 4, FFI_TYPE_LONGDOUBLE);
+
+#elif defined SPARC
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+#ifdef SPARC64
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
+#else
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 8, FFI_TYPE_LONGDOUBLE);
+#endif
+
+#elif defined X86_64
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
+
+#else
+
+FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
+FFI_INTEGRAL_TYPEDEF(longdouble, 8, 8, FFI_TYPE_LONGDOUBLE);
+
+#endif
+
diff --git a/lib/wrappers/libffi/msvc/win32.c b/lib/wrappers/libffi/msvc/win32.c
new file mode 100644
index 000000000..d1149a85e
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win32.c
@@ -0,0 +1,162 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* theller: almost verbatim translation from gas syntax to MSVC inline
+   assembler code. */
+
+/* theller: ffi_call_x86 now returns an integer - the difference of the stack
+   pointer before and after the function call.  If everything is ok, zero is
+   returned.  If stdcall functions are passed the wrong number of arguments,
+   the difference will be nonzero. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+__declspec(naked) int
+ffi_call_x86(void (* prepfunc)(char *, extended_cif *), /* 8 */
+	     extended_cif *ecif, /* 12 */
+	     unsigned bytes, /* 16 */
+	     unsigned flags, /* 20 */
+	     unsigned *rvalue, /* 24 */
+	     void (*fn)()) /* 28 */
+{
+	_asm {
+		push ebp
+		mov ebp, esp
+
+		push esi // NEW: this register must be preserved across function calls
+// XXX SAVE ESP NOW!
+		mov esi, esp		// save stack pointer before the call
+
+// Make room for all of the new args.
+		mov ecx, [ebp+16]
+		sub esp, ecx		// sub esp, bytes
+		
+		mov eax, esp
+
+// Place all of the ffi_prep_args in position
+		push [ebp + 12] // ecif
+		push eax
+		call [ebp + 8] // prepfunc
+
+// Return stack to previous state and call the function
+		add esp, 8
+// FIXME: Align the stack to a 128-bit boundary to avoid
+// potential performance hits.
+		call [ebp + 28]
+
+// Load ecif->cif->abi
+		mov ecx, [ebp + 12]
+		mov ecx, [ecx]ecif.cif
+		mov ecx, [ecx]ecif.cif.abi
+		
+		cmp ecx, FFI_STDCALL
+		je noclean
+// STDCALL: Remove the space we pushed for the args
+		mov ecx, [ebp + 16]
+		add esp, ecx
+// CDECL: Caller has already cleaned the stack
+noclean:
+// Check that esp has the same value as before!
+		sub esi, esp
+
+// Load %ecx with the return type code
+		mov ecx, [ebp + 20]
+
+// If the return value pointer is NULL, assume no return value.
+/*
+  Intel asm is weird. We have to explicitely specify 'DWORD PTR' in the nexr instruction,
+  otherwise only one BYTE will be compared (instead of a DWORD)!
+ */
+		cmp DWORD PTR [ebp + 24], 0
+		jne sc_retint
+
+// Even if there is no space for the return value, we are
+// obliged to handle floating-point values.
+		cmp ecx, FFI_TYPE_FLOAT
+		jne sc_noretval
+//        fstp  %st(0)
+		fstp st(0)
+
+		jmp sc_epilogue
+
+sc_retint:
+		cmp ecx, FFI_TYPE_INT
+		jne sc_retfloat
+//        # Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp + 24]
+		mov [ecx + 0], eax
+		jmp sc_epilogue
+
+sc_retfloat:
+		cmp ecx, FFI_TYPE_FLOAT
+		jne sc_retdouble
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+//        fstps (%ecx)
+		fstp DWORD PTR [ecx]
+		jmp sc_epilogue
+
+sc_retdouble:
+		cmp ecx, FFI_TYPE_DOUBLE
+		jne sc_retlongdouble
+//        movl  24(%ebp),%ecx
+		mov ecx, [ebp+24]
+		fstp QWORD PTR [ecx]
+		jmp sc_epilogue
+
+		jmp sc_retlongdouble // avoid warning about unused label
+sc_retlongdouble:
+		cmp ecx, FFI_TYPE_LONGDOUBLE
+		jne sc_retint64
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+//        fstpt (%ecx)
+		fstp QWORD PTR [ecx] /* XXX ??? */
+		jmp sc_epilogue
+
+sc_retint64:
+		cmp ecx, FFI_TYPE_SINT64
+		jne sc_retstruct
+// Load %ecx with the pointer to storage for the return value
+		mov ecx, [ebp+24]
+		mov [ecx+0], eax
+		mov [ecx+4], edx
+
+sc_retstruct:
+// Nothing to do!
+
+sc_noretval:
+sc_epilogue:
+		mov eax, esi
+		pop esi // NEW restore: must be preserved across function calls
+		mov esp, ebp
+		pop ebp
+		ret
+	}
+}
diff --git a/lib/wrappers/libffi/msvc/win32_asm.asm b/lib/wrappers/libffi/msvc/win32_asm.asm
new file mode 100644
index 000000000..407185e6a
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win32_asm.asm
@@ -0,0 +1,470 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+    ffi_prep_args : NEAR PTR DWORD,
+    ecif          : NEAR PTR DWORD,
+    cif_abi       : DWORD,
+    cif_bytes     : DWORD,
+    cif_flags     : DWORD,
+    rvalue        : NEAR PTR DWORD,
+    fn            : NEAR PTR DWORD
+
+        ;; Make room for all of the new args.
+        mov  ecx, cif_bytes
+        sub  esp, ecx
+
+        mov  eax, esp
+
+        ;; Place all of the ffi_prep_args in position
+        push ecif
+        push eax
+        call ffi_prep_args
+
+        ;; Return stack to previous state and call the function
+        add  esp, 8
+
+	;; Handle thiscall and fastcall
+	cmp cif_abi, 3 ;; FFI_THISCALL
+	jz do_thiscall
+	cmp cif_abi, 4 ;; FFI_FASTCALL
+	jnz do_stdcall
+	mov ecx, DWORD PTR [esp]
+	mov edx, DWORD PTR [esp+4]
+	add esp, 8
+	jmp do_stdcall
+do_thiscall:
+	mov ecx, DWORD PTR [esp]
+	add esp, 4
+do_stdcall:
+        call fn
+
+        ;; cdecl:   we restore esp in the epilogue, so there's no need to
+        ;;          remove the space we pushed for the args.
+        ;; stdcall: the callee has already cleaned the stack.
+
+        ;; Load ecx with the return type code
+        mov  ecx, cif_flags
+
+        ;; If the return value pointer is NULL, assume no return value.
+        cmp  rvalue, 0
+        jne  ca_jumptable
+
+        ;; Even if there is no space for the return value, we are
+        ;; obliged to handle floating-point values.
+        cmp  ecx, FFI_TYPE_FLOAT
+        jne  ca_epilogue
+        fstp st(0)
+
+        jmp  ca_epilogue
+
+ca_jumptable:
+        jmp  [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+        ;; Do not insert anything here between label and jump table.
+        dd offset ca_epilogue       ;; FFI_TYPE_VOID
+        dd offset ca_retint         ;; FFI_TYPE_INT
+        dd offset ca_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset ca_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset ca_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset ca_retuint8       ;; FFI_TYPE_UINT8
+        dd offset ca_retsint8       ;; FFI_TYPE_SINT8
+        dd offset ca_retuint16      ;; FFI_TYPE_UINT16
+        dd offset ca_retsint16      ;; FFI_TYPE_SINT16
+        dd offset ca_retint         ;; FFI_TYPE_UINT32
+        dd offset ca_retint         ;; FFI_TYPE_SINT32
+        dd offset ca_retint64       ;; FFI_TYPE_UINT64
+        dd offset ca_retint64       ;; FFI_TYPE_SINT64
+        dd offset ca_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset ca_retint         ;; FFI_TYPE_POINTER
+        dd offset ca_retstruct1b    ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset ca_retstruct2b    ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset ca_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset ca_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+        /* Sign/zero extend as appropriate.  */
+ca_retuint8:
+        movzx eax, al
+        jmp   ca_retint
+
+ca_retsint8:
+        movsx eax, al
+        jmp   ca_retint
+
+ca_retuint16:
+        movzx eax, ax
+        jmp   ca_retint
+
+ca_retsint16:
+        movsx eax, ax
+        jmp   ca_retint
+
+ca_retint:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        jmp   ca_epilogue
+
+ca_retint64:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        mov   [ecx + 4], edx
+        jmp   ca_epilogue
+
+ca_retfloat:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  DWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  QWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retlongdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  TBYTE PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retstruct1b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], al
+        jmp   ca_epilogue
+
+ca_retstruct2b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], ax
+        jmp   ca_epilogue
+
+ca_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_call_win32 ENDP
+
+ffi_closure_THISCALL PROC NEAR FORCEFRAME
+	sub	esp, 40
+	lea	edx, [ebp -24]
+	mov	[ebp - 12], edx	/* resp */
+	lea	edx, [ebp + 12]  /* account for stub return address on stack */
+	jmp	stub
+ffi_closure_THISCALL ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 8]
+stub::
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cs_jumptable:
+        jmp  [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cs_epilogue       ;; FFI_TYPE_VOID
+        dd offset cs_retint         ;; FFI_TYPE_INT
+        dd offset cs_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cs_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cs_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cs_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cs_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cs_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cs_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cs_retint         ;; FFI_TYPE_UINT32
+        dd offset cs_retint         ;; FFI_TYPE_SINT32
+        dd offset cs_retint64       ;; FFI_TYPE_UINT64
+        dd offset cs_retint64       ;; FFI_TYPE_SINT64
+        dd offset cs_retstruct      ;; FFI_TYPE_STRUCT
+        dd offset cs_retint         ;; FFI_TYPE_POINTER
+        dd offset cs_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cs_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cs_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cs_retmsstruct    ;; FFI_TYPE_MS_STRUCT
+
+cs_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retint:
+        mov   eax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cs_epilogue
+
+cs_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retstruct:
+        ;; Caller expects us to pop struct return value pointer hidden arg.
+        ;; Epilogue code is autogenerated.
+        ret	4
+
+cs_retmsstruct:
+        ;; Caller expects us to return a pointer to the real return value.
+        mov   eax, ecx
+        ;; Caller doesn't expects us to pop struct return value pointer hidden arg.
+        jmp   cs_epilogue
+
+cs_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
+	sub esp, 36
+	mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+	mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+	mov [esp + 12], edx
+	lea edx, [ebp + 12]
+	jmp stubraw
+ffi_closure_raw_THISCALL ENDP
+
+ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov  [esp + 12], edx                            ;; user_data
+        lea  edx, [ebp + 8]
+stubraw::
+        mov  [esp + 8], edx                             ;; raw_args
+        lea  edx, [ebp - 24]
+        mov  [esp + 4], edx                             ;; &res
+        mov  [esp], esi                                 ;; cif
+        call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET]   ;; closure->fun
+        mov  eax, [esi + CIF_FLAGS_OFFSET]              ;; cif->flags
+        lea  ecx, [ebp - 24]
+
+cr_jumptable:
+        jmp  [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cr_epilogue       ;; FFI_TYPE_VOID
+        dd offset cr_retint         ;; FFI_TYPE_INT
+        dd offset cr_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cr_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cr_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cr_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cr_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cr_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cr_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cr_retint         ;; FFI_TYPE_UINT32
+        dd offset cr_retint         ;; FFI_TYPE_SINT32
+        dd offset cr_retint64       ;; FFI_TYPE_UINT64
+        dd offset cr_retint64       ;; FFI_TYPE_SINT64
+        dd offset cr_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cr_retint         ;; FFI_TYPE_POINTER
+        dd offset cr_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cr_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cr_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cr_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+cr_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retint:
+        mov   eax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cr_epilogue
+
+cr_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 12]         ;; account for stub return address on stack
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cd_jumptable:
+        jmp  [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cd_epilogue       ;; FFI_TYPE_VOID
+        dd offset cd_retint         ;; FFI_TYPE_INT
+        dd offset cd_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cd_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cd_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cd_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cd_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cd_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cd_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cd_retint         ;; FFI_TYPE_UINT32
+        dd offset cd_retint         ;; FFI_TYPE_SINT32
+        dd offset cd_retint64       ;; FFI_TYPE_UINT64
+        dd offset cd_retint64       ;; FFI_TYPE_SINT64
+        dd offset cd_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cd_retint         ;; FFI_TYPE_POINTER
+        dd offset cd_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cd_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cd_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retint:
+        mov   eax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cd_epilogue
+
+cd_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
diff --git a/lib/wrappers/libffi/msvc/win64_asm.asm b/lib/wrappers/libffi/msvc/win64_asm.asm
new file mode 100644
index 000000000..301188bc9
--- /dev/null
+++ b/lib/wrappers/libffi/msvc/win64_asm.asm
@@ -0,0 +1,156 @@
+PUBLIC	ffi_call_AMD64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_SYSV:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_OUTER will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_SYSV for the actual work, then return the result.
+;;; 
+ffi_closure_OUTER PROC FRAME
+	;; save actual arguments to their stack space.
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	forth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+forth:
+	test	r11, 8
+	jne	forth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+forth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+.ALLOCSTACK 40
+	sub	rsp, 40
+.ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 40		; correct our own area
+	mov	rax, ffi_closure_SYSV
+	call	rax		; call the real closure function
+	;; Here, code is missing that handles float return values
+	add	rsp, 40
+	movd	xmm0, rax	; In case the closure returned a float.
+	ret	0
+ffi_closure_OUTER ENDP
+
+
+;;; ffi_call_AMD64
+
+stack$ = 0
+prepfunc$ = 32
+ecif$ = 40
+bytes$ = 48
+flags$ = 56
+rvalue$ = 64
+fn$ = 72
+
+ffi_call_AMD64 PROC FRAME
+
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+.PUSHREG rbp
+	push	rbp
+.ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+.SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+.ENDPROLOG
+
+	mov	eax, DWORD PTR bytes$[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR stack$[rbp], rax
+
+	mov	rdx, QWORD PTR ecif$[rbp]
+	mov	rcx, QWORD PTR stack$[rbp]
+	call	QWORD PTR prepfunc$[rbp]
+
+	mov	rsp, QWORD PTR stack$[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR fn$[rbp]
+ret_int$:
+ 	cmp	DWORD PTR flags$[rbp], 1 ; FFI_TYPE_INT
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR rvalue$[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	SHORT ret_nothing$
+
+ret_float$:
+ 	cmp	DWORD PTR flags$[rbp], 2 ; FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR rvalue$[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_nothing$
+
+ret_double$:
+ 	cmp	DWORD PTR flags$[rbp], 3 ; FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_int64$
+
+ 	mov	rax, QWORD PTR rvalue$[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_nothing$
+
+ret_int64$:
+  	cmp	DWORD PTR flags$[rbp], 12 ; FFI_TYPE_SINT64
+  	jne	ret_nothing$
+
+ 	mov	rcx, QWORD PTR rvalue$[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_nothing$
+	
+ret_nothing$:
+	xor	eax, eax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_AMD64 ENDP
+_TEXT	ENDS
+END