added libffi for win support

author: Araq <rumpf_a@web.de> 2013-12-19 01:00:51 +0100
committer: Araq <rumpf_a@web.de> 2013-12-19 01:00:51 +0100
commit: e2a4d591e5f34685071985cb4070e96b7e053a1a (patch)
tree: faf5b13f0a1a1da6009e9095bd4d1993bb5eedb9 /lib/wrappers/libffi/gcc
parent: 3d869d4decb336c1396a0bfe4db24ea9dcda043e (diff)
download: Nim-e2a4d591e5f34685071985cb4070e96b7e053a1a.tar.gz
9 files changed, 4644 insertions, 0 deletions
diff --git a/lib/wrappers/libffi/gcc/closures.c b/lib/wrappers/libffi/gcc/closures.c
new file mode 100644
index 000000000..c0ee06891
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/closures.c
@@ -0,0 +1,627 @@
+/* -----------------------------------------------------------------------
+   closures.c - Copyright (c) 2007, 2009, 2010  Red Hat, Inc.
+                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
+                Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   Code to allocate and deallocate memory for closures.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined __linux__ && !defined _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
+# if __gnu_linux__
+/* This macro indicates it may be forbidden to map anonymous memory
+   with both write and execute permission.  Code compiled when this
+   option is defined will attempt to map such pages once, but if it
+   fails, it falls back to creating a temporary file in a writable and
+   executable filesystem and mapping pages from it into separate
+   locations in the virtual memory space, one location writable and
+   another executable.  */
+#  define FFI_MMAP_EXEC_WRIT 1
+#  define HAVE_MNTENT 1
+# endif
+# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)
+/* Windows systems may have Data Execution Protection (DEP) enabled, 
+   which requires the use of VirtualMalloc/VirtualFree to alloc/free
+   executable memory. */
+#  define FFI_MMAP_EXEC_WRIT 1
+# endif
+#endif
+
+#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
+# ifdef __linux__
+/* When defined to 1 check for SELinux and if SELinux is active,
+   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
+   might cause audit messages.  */
+#  define FFI_MMAP_EXEC_SELINUX 1
+# endif
+#endif
+
+#if FFI_CLOSURES
+
+# if FFI_EXEC_TRAMPOLINE_TABLE
+
+// Per-target implementation; It's unclear what can reasonable be shared
+// between two OS/architecture implementations.
+
+# elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
+
+#define USE_LOCKS 1
+#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 1
+#endif
+#endif
+
+/* We need to use mmap, not sbrk.  */
+#define HAVE_MORECORE 0
+
+/* We could, in theory, support mremap, but it wouldn't buy us anything.  */
+#define HAVE_MREMAP 0
+
+/* We have no use for this, so save some code and data.  */
+#define NO_MALLINFO 1
+
+/* We need all allocations to be in regular segments, otherwise we
+   lose track of the corresponding code address.  */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+
+/* Don't allocate more than a page unless needed.  */
+#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
+
+#if FFI_CLOSURE_TEST
+/* Don't release single pages, to avoid a worst-case scenario of
+   continuously allocating and releasing single pages, but release
+   pairs of pages, which should do just as well given that allocations
+   are likely to be small.  */
+#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize)
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <stdio.h>
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#ifdef HAVE_MNTENT
+#include <mntent.h>
+#endif /* HAVE_MNTENT */
+#include <sys/param.h>
+#include <pthread.h>
+
+/* We don't want sys/mman.h to be included after we redefine mmap and
+   dlmunmap.  */
+#include <sys/mman.h>
+#define LACKS_SYS_MMAN_H 1
+
+#if FFI_MMAP_EXEC_SELINUX
+#include <sys/statfs.h>
+#include <stdlib.h>
+
+static int selinux_enabled = -1;
+
+static int
+selinux_enabled_check (void)
+{
+  struct statfs sfs;
+  FILE *f;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (statfs ("/selinux", &sfs) >= 0
+      && (unsigned int) sfs.f_type == 0xf97cff8cU)
+    return 1;
+  f = fopen ("/proc/mounts", "r");
+  if (f == NULL)
+    return 0;
+  while (getline (&buf, &len, f) >= 0)
+    {
+      char *p = strchr (buf, ' ');
+      if (p == NULL)
+        break;
+      p = strchr (p + 1, ' ');
+      if (p == NULL)
+        break;
+      if (strncmp (p + 1, "selinuxfs ", 10) == 0)
+        {
+          free (buf);
+          fclose (f);
+          return 1;
+        }
+    }
+  free (buf);
+  fclose (f);
+  return 0;
+}
+
+#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \
+			      : (selinux_enabled = selinux_enabled_check ()))
+
+#else
+
+#define is_selinux_enabled() 0
+
+#endif /* !FFI_MMAP_EXEC_SELINUX */
+
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
+  if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL)
+    return 1;
+  else
+    return 0;
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+                               : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#elif defined (__CYGWIN__) || defined(__INTERIX)
+
+#include <sys/mman.h>
+
+/* Cygwin is Linux-like, but not quite that Linux-like.  */
+#define is_selinux_enabled() 0
+
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+/* Use these for mmap and munmap within dlmalloc.c.  */
+static void *dlmmap(void *, size_t, int, int, int, off_t);
+static int dlmunmap(void *, size_t);
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+
+/* A mutex used to synchronize access to *exec* variables in this file.  */
+static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* A file descriptor of a temporary file from which we'll map
+   executable pages.  */
+static int execfd = -1;
+
+/* The amount of space already allocated from the temporary file.  */
+static size_t execsize = 0;
+
+/* Open a temporary file name, and immediately unlink it.  */
+static int
+open_temp_exec_file_name (char *name)
+{
+  int fd = mkstemp (name);
+
+  if (fd != -1)
+    unlink (name);
+
+  return fd;
+}
+
+/* Open a temporary file in the named directory.  */
+static int
+open_temp_exec_file_dir (const char *dir)
+{
+  static const char suffix[] = "/ffiXXXXXX";
+  int lendir = strlen (dir);
+  char *tempname = __builtin_alloca (lendir + sizeof (suffix));
+
+  if (!tempname)
+    return -1;
+
+  memcpy (tempname, dir, lendir);
+  memcpy (tempname + lendir, suffix, sizeof (suffix));
+
+  return open_temp_exec_file_name (tempname);
+}
+
+/* Open a temporary file in the directory in the named environment
+   variable.  */
+static int
+open_temp_exec_file_env (const char *envvar)
+{
+  const char *value = getenv (envvar);
+
+  if (!value)
+    return -1;
+
+  return open_temp_exec_file_dir (value);
+}
+
+#ifdef HAVE_MNTENT
+/* Open a temporary file in an executable and writable mount point
+   listed in the mounts file.  Subsequent calls with the same mounts
+   keep searching for mount points in the same file.  Providing NULL
+   as the mounts file closes the file.  */
+static int
+open_temp_exec_file_mnt (const char *mounts)
+{
+  static const char *last_mounts;
+  static FILE *last_mntent;
+
+  if (mounts != last_mounts)
+    {
+      if (last_mntent)
+	endmntent (last_mntent);
+
+      last_mounts = mounts;
+
+      if (mounts)
+	last_mntent = setmntent (mounts, "r");
+      else
+	last_mntent = NULL;
+    }
+
+  if (!last_mntent)
+    return -1;
+
+  for (;;)
+    {
+      int fd;
+      struct mntent mnt;
+      char buf[MAXPATHLEN * 3];
+
+      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
+	return -1;
+
+      if (hasmntopt (&mnt, "ro")
+	  || hasmntopt (&mnt, "noexec")
+	  || access (mnt.mnt_dir, W_OK))
+	continue;
+
+      fd = open_temp_exec_file_dir (mnt.mnt_dir);
+
+      if (fd != -1)
+	return fd;
+    }
+}
+#endif /* HAVE_MNTENT */
+
+/* Instructions to look for a location to hold a temporary file that
+   can be mapped in for execution.  */
+static struct
+{
+  int (*func)(const char *);
+  const char *arg;
+  int repeat;
+} open_temp_exec_file_opts[] = {
+  { open_temp_exec_file_env, "TMPDIR", 0 },
+  { open_temp_exec_file_dir, "/tmp", 0 },
+  { open_temp_exec_file_dir, "/var/tmp", 0 },
+  { open_temp_exec_file_dir, "/dev/shm", 0 },
+  { open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
+  { open_temp_exec_file_mnt, "/etc/mtab", 1 },
+  { open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
+};
+
+/* Current index into open_temp_exec_file_opts.  */
+static int open_temp_exec_file_opts_idx = 0;
+
+/* Reset a current multi-call func, then advances to the next entry.
+   If we're at the last, go back to the first and return nonzero,
+   otherwise return zero.  */
+static int
+open_temp_exec_file_opts_next (void)
+{
+  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL);
+
+  open_temp_exec_file_opts_idx++;
+  if (open_temp_exec_file_opts_idx
+      == (sizeof (open_temp_exec_file_opts)
+	  / sizeof (*open_temp_exec_file_opts)))
+    {
+      open_temp_exec_file_opts_idx = 0;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return a file descriptor of a temporary zero-sized file in a
+   writable and exexutable filesystem.  */
+static int
+open_temp_exec_file (void)
+{
+  int fd;
+
+  do
+    {
+      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func
+	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg);
+
+      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat
+	  || fd == -1)
+	{
+	  if (open_temp_exec_file_opts_next ())
+	    break;
+	}
+    }
+  while (fd == -1);
+
+  return fd;
+}
+
+/* Map in a chunk of memory from the temporary exec file into separate
+   locations in the virtual memory address space, one writable and one
+   executable.  Returns the address of the writable portion, after
+   storing an offset to the corresponding executable portion at the
+   last word of the requested chunk.  */
+static void *
+dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+{
+  void *ptr;
+
+  if (execfd == -1)
+    {
+      open_temp_exec_file_opts_idx = 0;
+    retry_open:
+      execfd = open_temp_exec_file ();
+      if (execfd == -1)
+	return MFAIL;
+    }
+
+  offset = execsize;
+
+  if (ftruncate (execfd, offset + length))
+    return MFAIL;
+
+  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+  flags |= MAP_SHARED;
+
+  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC,
+	      flags, execfd, offset);
+  if (ptr == MFAIL)
+    {
+      if (!offset)
+	{
+	  close (execfd);
+	  goto retry_open;
+	}
+      ftruncate (execfd, offset);
+      return MFAIL;
+    }
+  else if (!offset
+	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts_next ();
+
+  start = mmap (start, length, prot, flags, execfd, offset);
+
+  if (start == MFAIL)
+    {
+      munmap (ptr, length);
+      ftruncate (execfd, offset);
+      return start;
+    }
+
+  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start;
+
+  execsize += length;
+
+  return start;
+}
+
+/* Map in a writable and executable chunk of memory if possible.
+   Failing that, fall back to dlmmap_locked.  */
+static void *
+dlmmap (void *start, size_t length, int prot,
+	int flags, int fd, off_t offset)
+{
+  void *ptr;
+
+  assert (start == NULL && length % malloc_getpagesize == 0
+	  && prot == (PROT_READ | PROT_WRITE)
+	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS)
+	  && fd == -1 && offset == 0);
+
+#if FFI_CLOSURE_TEST
+  printf ("mapping in %zi\n", length);
+#endif
+
+  if (execfd == -1 && is_emutramp_enabled ())
+    {
+      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+      return ptr;
+    }
+
+  if (execfd == -1 && !is_selinux_enabled ())
+    {
+      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
+
+      if (ptr != MFAIL || (errno != EPERM && errno != EACCES))
+	/* Cool, no need to mess with separate segments.  */
+	return ptr;
+
+      /* If MREMAP_DUP is ever introduced and implemented, try mmap
+	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with
+	 MREMAP_DUP and prot at this point.  */
+    }
+
+  if (execsize == 0 || execfd == -1)
+    {
+      pthread_mutex_lock (&open_temp_exec_file_mutex);
+      ptr = dlmmap_locked (start, length, prot, flags, offset);
+      pthread_mutex_unlock (&open_temp_exec_file_mutex);
+
+      return ptr;
+    }
+
+  return dlmmap_locked (start, length, prot, flags, offset);
+}
+
+/* Release memory at the given address, as well as the corresponding
+   executable page if it's separate.  */
+static int
+dlmunmap (void *start, size_t length)
+{
+  /* We don't bother decreasing execsize or truncating the file, since
+     we can't quite tell whether we're unmapping the end of the file.
+     We don't expect frequent deallocation anyway.  If we did, we
+     could locate pages in the file by writing to the pages being
+     deallocated and checking that the file contents change.
+     Yuck.  */
+  msegmentptr seg = segment_holding (gm, start);
+  void *code;
+
+#if FFI_CLOSURE_TEST
+  printf ("unmapping %zi\n", length);
+#endif
+
+  if (seg && (code = add_segment_exec_offset (start, seg)) != start)
+    {
+      int ret = munmap (code, length);
+      if (ret)
+	return ret;
+    }
+
+  return munmap (start, length);
+}
+
+#if FFI_CLOSURE_FREE_CODE
+/* Return segment holding given code address.  */
+static msegmentptr
+segment_holding_code (mstate m, char* addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= add_segment_exec_offset (sp->base, sp)
+	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+#endif
+
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+/* Allocate a chunk of memory with the given size.  Returns a pointer
+   to the writable address, and sets *CODE to the executable
+   corresponding virtual address.  */
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  *code = malloc(size);
+  return *code;
+#if 0
+  void *ptr;
+
+  if (!code)
+    return NULL;
+
+  ptr = dlmalloc (size);
+
+  if (ptr)
+    {
+      msegmentptr seg = segment_holding (gm, ptr);
+
+      *code = add_segment_exec_offset (ptr, seg);
+    }
+
+  return ptr;
+#endif
+}
+
+/* Release a chunk of memory allocated with ffi_closure_alloc.  If
+   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
+   writable or the executable address given.  Otherwise, only the
+   writable address can be provided here.  */
+void
+ffi_closure_free (void *ptr)
+{
+#if 0
+#if FFI_CLOSURE_FREE_CODE
+  msegmentptr seg = segment_holding_code(gm, ptr);
+
+  if (seg)
+    ptr = sub_segment_exec_offset(ptr, seg);
+#endif
+
+  dlfree(ptr);
+#endif
+  free(ptr);
+}
+
+
+#if FFI_CLOSURE_TEST
+/* Do some internal sanity testing to make sure allocation and
+   deallocation of pages are working as intended.  */
+int main ()
+{
+  void *p[3];
+#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0)
+#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0)
+  GET (0, malloc_getpagesize / 2);
+  GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*));
+  PUT (1);
+  GET (1, 2 * malloc_getpagesize);
+  GET (2, malloc_getpagesize / 2);
+  PUT (1);
+  PUT (0);
+  PUT (2);
+  return 0;
+}
+#endif /* FFI_CLOSURE_TEST */
+# else /* ! FFI_MMAP_EXEC_WRIT */
+
+/* On many systems, memory returned by malloc is writable and
+   executable, so just use it.  */
+
+#include <stdlib.h>
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  return *code = malloc (size);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  free (ptr);
+}
+
+# endif /* ! FFI_MMAP_EXEC_WRIT */
+#endif /* FFI_CLOSURES */
diff --git a/lib/wrappers/libffi/gcc/ffi.c b/lib/wrappers/libffi/gcc/ffi.c
new file mode 100644
index 000000000..0600414d4
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi.c
@@ -0,0 +1,841 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifdef X86_WIN32
+  size_t p_stack_args[2];
+  void *p_stack_data[2];
+  char *argp2 = stack;
+  int stack_args_count = 0;
+  int cabi = ecif->cif->abi;
+#endif
+
+  argp = stack;
+
+  if ((ecif->cif->flags == FFI_TYPE_STRUCT
+       || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+          && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+      )
+    {
+      *(void **) argp = ecif->rvalue;
+#ifdef X86_WIN32
+      /* For fastcall/thiscall this is first register-passed
+         argument.  */
+      if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	{
+	  p_stack_args[stack_args_count] = sizeof (void*);
+	  p_stack_data[stack_args_count] = argp;
+	  ++stack_args_count;
+	}
+#endif
+      argp += sizeof(void*);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+#ifdef X86_WIN64
+      if (z > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = sizeof(ffi_arg);
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < sizeof(ffi_arg))
+        {
+          z = sizeof(ffi_arg);
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+
+#ifdef X86_WIN32
+    /* For thiscall/fastcall convention register-passed arguments
+       are the first two none-floating-point arguments with a size
+       smaller or equal to sizeof (void*).  */
+    if ((cabi == FFI_THISCALL && stack_args_count < 1)
+        || (cabi == FFI_FASTCALL && stack_args_count < 2))
+      {
+	if (z <= 4
+	    && ((*p_arg)->type != FFI_TYPE_FLOAT
+	        && (*p_arg)->type != FFI_TYPE_STRUCT))
+	  {
+	    p_stack_args[stack_args_count] = z;
+	    p_stack_data[stack_args_count] = argp;
+	    ++stack_args_count;
+	  }
+      }
+#endif
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+#ifdef X86_WIN32
+  /* We need to move the register-passed arguments for thiscall/fastcall
+     on top of stack, so that those can be moved to registers ecx/edx by
+     call-handler.  */
+  if (stack_args_count > 0)
+    {
+      size_t zz = (p_stack_args[0] + 3) & ~3;
+      char *h;
+
+      /* Move first argument to top-stack position.  */
+      if (p_stack_data[0] != argp2)
+	{
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[0], zz);
+	  memmove (argp2 + zz, argp2,
+	           (size_t) ((char *) p_stack_data[0] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+
+      argp2 += zz;
+      --stack_args_count;
+      if (zz > 4)
+	stack_args_count = 0;
+
+      /* If we have a second argument, then move it on top
+         after the first one.  */
+      if (stack_args_count > 0 && p_stack_data[1] != argp2)
+	{
+	  zz = p_stack_args[1];
+	  zz = (zz + 3) & ~3;
+	  h = alloca (zz + 1);
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[1], zz);
+	  memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+    }
+#endif
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+#ifdef X86_WIN32
+          if (cif->abi == FFI_MS_CDECL)
+            cif->flags = FFI_TYPE_MS_STRUCT;
+          else
+#endif
+            cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && cif->rtype->size != 1 && cif->rtype->size != 2
+      && cif->rtype->size != 4 && cif->rtype->size != 8)
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                     cif->flags, ecif.rvalue, fn);
+      break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC or SUNPRO_C -- standard conventions apply. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+                                         void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+                            ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+#ifdef X86_WIN64
+  if (cif->rtype->size > sizeof(ffi_arg)
+      || (cif->flags == FFI_TYPE_STRUCT
+          && (cif->rtype->size != 1 && cif->rtype->size != 2
+              && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#else
+  if ( cif->flags == FFI_TYPE_STRUCT
+       || cif->flags == FFI_TYPE_MS_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#endif
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp) {
+        argp = (char *) ALIGN(argp, sizeof(void*));
+      }
+
+#ifdef X86_WIN64
+      if ((*p_arg)->size > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+                  && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+        {
+          z = sizeof(void *);
+          *p_argv = *(void **)argp;
+        }
+      else
+#endif
+        {
+          z = (*p_arg)->size;
+          
+          /* because we're little endian, this is what it turns into.   */
+          
+          *p_argv = (void*) argp;
+        }
+          
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 49);  \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned int *) &__tramp[0] = 0x8324048b;	/* mov (%esp), %eax */ \
+   *(unsigned int *) &__tramp[4] = 0x4c890cec;	/* sub $12, %esp */ \
+   *(unsigned int *) &__tramp[8] = 0x04890424;	/* mov %ecx, 4(%esp) */ \
+   *(unsigned char*) &__tramp[12] = 0x24;	/* mov %eax, (%esp) */ \
+   *(unsigned char*) &__tramp[13] = 0xb8; \
+   *(unsigned int *) &__tramp[14] = __size;	/* mov __size, %eax */ \
+   *(unsigned int *) &__tramp[18] = 0x08244c8d;	/* lea 8(%esp), %ecx */ \
+   *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
+   *(unsigned short*) &__tramp[26] = 0x0b74;	/* jz 1f */ \
+   *(unsigned int *) &__tramp[28] = 0x8908518b;	/* 2b: mov 8(%ecx), %edx */ \
+   *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
+   *(unsigned char*) &__tramp[36] = 0x48;	/* dec %eax */ \
+   *(unsigned short*) &__tramp[37] = 0xf575;	/* jnz 2b ; 1f: */ \
+   *(unsigned char*) &__tramp[39] = 0xb8; \
+   *(unsigned int*)  &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[44] = 0xe8; \
+   *(unsigned int*)  &__tramp[45] = __dis; /* call __fun  */ \
+   *(unsigned char*)  &__tramp[49] = 0xc2; /* ret  */ \
+   *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
+				    &ffi_closure_THISCALL,
+				    (void*)codeloc,
+				    cif->bytes);
+    }
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc, cif->bytes);
+    }
+  else if (cif->abi == FFI_MS_CDECL)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+    if (cif->abi != FFI_THISCALL)
+#endif
+    return FFI_BAD_ABI;
+  }
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+#ifdef X86_WIN32
+  if (cif->abi == FFI_SYSV)
+    {
+#endif
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+#ifdef X86_WIN32
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+				    codeloc, cif->bytes);
+    }
+#endif
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  cif->abi = abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  cif->abi = abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
diff --git a/lib/wrappers/libffi/gcc/ffi64.c b/lib/wrappers/libffi/gcc/ffi64.c
new file mode 100644
index 000000000..2014af24c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/ffi64.c
@@ -0,0 +1,673 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+             
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS]; 
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	int size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  /* Sanity check on the cif ABI.  */
+  {
+    int abi = cif->abi;
+    if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+      return FFI_BAD_ABI;
+  }
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *((unsigned long long * volatile) &tramp[1])
+    = (unsigned long) ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *((unsigned long long * volatile) &tramp[6])
+    = (unsigned long) codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/lib/wrappers/libffi/gcc/prep_cif.c b/lib/wrappers/libffi/gcc/prep_cif.c
new file mode 100644
index 000000000..e8ec5cf1e
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/prep_cif.c
@@ -0,0 +1,237 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green
+                Copyright (c) 1996, 1998, 2007  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(ffi_type *arg)
+{
+  ffi_type **ptr;
+
+  if (UNLIKELY(arg == NULL || arg->elements == NULL))
+    return FFI_BAD_TYPEDEF;
+
+  arg->size = 0;
+  arg->alignment = 0;
+
+  ptr = &(arg->elements[0]);
+
+  if (UNLIKELY(ptr == 0))
+    return FFI_BAD_TYPEDEF;
+
+  while ((*ptr) != NULL)
+    {
+      if (UNLIKELY(((*ptr)->size == 0)
+		    && (initialize_aggregate((*ptr)) != FFI_OK)))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+/* For non variadic functions isvariadic should be 0 and
+   nfixedargs==ntotalargs.
+
+   For variadic calls, isvariadic should be 1 and nfixedargs
+   and ntotalargs set as appropriate. nfixedargs must always be >=1 */
+
+
+ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
+			     unsigned int isvariadic,
+                             unsigned int nfixedargs,
+                             unsigned int ntotalargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1));
+  FFI_ASSERT(nfixedargs <= ntotalargs);
+
+#ifndef X86_WIN32
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+#else
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI || abi == FFI_THISCALL))
+    return FFI_BAD_ABI;
+#endif
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = ntotalargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
+#if !defined M68K && !defined X86_ANY && !defined S390 && !defined PA
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+#ifdef TILE
+      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+      && (cif->rtype->size > 16)
+#endif
+
+     )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined X86_ANY && !defined S390 && !defined PA
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+
+#ifdef TILE
+	  if (bytes < 10 * FFI_SIZEOF_ARG &&
+	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+	    {
+	      /* An argument is never split between the 10 parameter
+		 registers and the stack.  */
+	      bytes = 10 * FFI_SIZEOF_ARG;
+	    }
+#endif
+#ifdef XTENSA
+	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+	    bytes = 6*4;
+#endif
+
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+#ifdef FFI_TARGET_SPECIFIC_VARIADIC
+  if (isvariadic)
+	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs);
+#endif
+
+  return ffi_prep_cif_machdep(cif);
+}
+#endif /* not __CRIS__ */
+
+ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes);
+}
+
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+                            ffi_abi abi,
+                            unsigned int nfixedargs,
+                            unsigned int ntotalargs,
+                            ffi_type *rtype,
+                            ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+}
+
+#if FFI_CLOSURES
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+{
+  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure);
+}
+
+#endif
diff --git a/lib/wrappers/libffi/gcc/types.c b/lib/wrappers/libffi/gcc/types.c
new file mode 100644
index 000000000..0a11eb0fb
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/types.c
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* Hide the basic type definitions from the header file, so that we
+   can redefine them here as "const".  */
+#define LIBFFI_HIDE_BASIC_TYPES
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_TYPEDEF(name, type, id)		\
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
+const ffi_type ffi_type_##name = {		\
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}
+
+/* Size and alignment are fake here. They must not be 0. */
+const ffi_type ffi_type_void = {
+  1, 1, FFI_TYPE_VOID, NULL
+};
+
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64);
+
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER);
+
+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE);
+
+#ifdef __alpha__
+/* Even if we're not configured to default to 128-bit long double, 
+   maintain binary compatibility, as -mlong-double-128 can be used
+   at any time.  */
+/* Validate the hard-coded number below.  */
+# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
+#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
+#endif
diff --git a/lib/wrappers/libffi/gcc/win32_asm.asm b/lib/wrappers/libffi/gcc/win32_asm.asm
new file mode 100644
index 000000000..ce3c4f3f3
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.asm
@@ -0,0 +1,759 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffitarget.h>
+
+	.text
+ 
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_call_win32
+#ifndef __OS2__
+	.def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        // Make room for all of the new args.
+        movl  20(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        // Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+ 
+        // Return stack to previous state and call the function
+        addl  $8,%esp
+
+	// Handle fastcall and thiscall
+	cmpl $3, 16(%ebp)  // FFI_THISCALL
+	jz .do_thiscall
+	cmpl $4, 16(%ebp) // FFI_FASTCALL
+	jnz .do_fncall
+	movl (%esp), %ecx
+	movl 4(%esp), %edx
+	addl $8, %esp
+	jmp .do_fncall
+.do_thiscall:
+	movl (%esp), %ecx
+	addl $4, %esp
+
+.do_fncall:
+	 
+        // FIXME: Align the stack to a 128-bit boundary to avoid
+        // potential performance hits.
+
+        call  *32(%ebp)
+ 
+        // stdcall functions pop arguments off the stack themselves
+
+        // Load %ecx with the return type code
+        movl  24(%ebp),%ecx
+ 
+        // If the return value pointer is NULL, assume no return value.
+        cmpl  $0,28(%ebp)
+        jne   0f
+ 
+        // Even if there is no space for the return value, we are
+        // obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   .Lnoretval
+        fstp  %st(0)
+ 
+        jmp   .Lepilogue
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lstore_table:
+	.long	.Lnoretval		/* FFI_TYPE_VOID */
+	.long	.Lretint		/* FFI_TYPE_INT */
+	.long	.Lretfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lretdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lretlongdouble		/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lretuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lretsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lretuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lretsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lretint		/* FFI_TYPE_UINT32 */
+	.long	.Lretint		/* FFI_TYPE_SINT32 */
+	.long	.Lretint64		/* FFI_TYPE_UINT64 */
+	.long	.Lretint64		/* FFI_TYPE_SINT64 */
+	.long	.Lretstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lretint		/* FFI_TYPE_POINTER */
+	.long	.Lretstruct1b		/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lretstruct2b		/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lretstruct4b		/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lretstruct		/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%ecx, %ecx
+	add	%ecx, %ecx
+	add	(%esp),%ecx
+	add	$4, %esp
+	jmp	*(%ecx)
+
+	/* Sign/zero extend as appropriate.  */
+.Lretsint8:
+	movsbl	%al, %eax
+	jmp	.Lretint
+
+.Lretsint16:
+	movswl	%ax, %eax
+	jmp	.Lretint
+
+.Lretuint8:
+	movzbl	%al, %eax
+	jmp	.Lretint
+
+.Lretuint16:
+	movzwl	%ax, %eax
+	jmp	.Lretint
+
+.Lretint:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretfloat:
+         // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstps (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretlongdouble:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretint64:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+	jmp   .Lepilogue
+
+.Lretstruct1b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretstruct2b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct4b:
+        // Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct:
+        // Nothing to do!
+ 
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+	.globl	_ffi_closure_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	jmp	.stub
+.LFE1:
+
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+.stub:
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+	.long	.Lcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lcls_retint		/* FFI_TYPE_INT */
+	.long	.Lcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lcls_retdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lcls_retuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lcls_retsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lcls_retstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lcls_retmsstruct	/* FFI_TYPE_MS_STRUCT */
+
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lcls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct:
+        // Caller expects us to pop struct return value pointer hidden arg.
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$0x4
+
+.Lcls_retmsstruct:
+	// Caller expects us to return a pointer to the real return value.
+	mov	%ecx, %eax
+	// Caller doesn't expects us to pop struct return value pointer hidden arg.
+	jmp	.Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+        .balign 16
+	.globl	_ffi_closure_raw_THISCALL
+#ifndef __OS2__
+	.def	_ffi_closure_raw_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_THISCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	jmp	.stubraw
+        // This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_raw_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_raw_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_SYSV:
+.LFB4:
+	pushl	%ebp
+.LCFI6:
+	movl	%esp, %ebp
+.LCFI7:
+	pushl	%esi
+.LCFI8:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+.stubraw:
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+	.long	.Lrcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lrcls_retint		/* FFI_TYPE_INT */
+	.long	.Lrcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lrcls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lrcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lrcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lrcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lrcls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lrcls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lrcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lrcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lrcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lrcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lrcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lrcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_MS_STRUCT */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lrcls_retsint8:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retsint16:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint8:
+	movzbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint16:
+	movzwl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct1:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct2:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct4:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct:
+	// Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+        // This assumes we are using gas.
+	.balign	16
+	.globl	_ffi_closure_STDCALL
+#ifndef __OS2__
+	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_STDCALL:
+.LFB5:
+	pushl	%ebp
+.LCFI9:
+	movl	%esp, %ebp
+.LCFI10:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	movl	%edx, 4(%esp)	/* args */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+0:
+	call	1f
+	// Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+	.long	.Lscls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lscls_retint		/* FFI_TYPE_INT */
+	.long	.Lscls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lscls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lscls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lscls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lscls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lscls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lscls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lscls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lscls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lscls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lscls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lscls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lscls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lscls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lscls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lscls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lscls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retfloat:
+	flds	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct:
+	// Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#ifndef __OS2__
+	.section	.eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+	.long	.LECIE1-.LASCIE1  /* Length of Common Information Entry */
+.LASCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE1:
+
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+	.long	.LEFDE4-.LASFDE4	/* FDE Length */
+.LASFDE4:
+	.long	.LASFDE4-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB4-.	/* FDE initial location */
+#else
+	.long	.LFB4
+#endif
+	.long	.LFE4-.LFB4	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LFB4
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI6
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI8-.LCFI7
+	.byte	0x86	/* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+	.byte	0x3	/* .uleb128 0x3 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+	.long	.LEFDE5-.LASFDE5	/* FDE Length */
+.LASFDE5:
+	.long	.LASFDE5-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB5-.	/* FDE initial location */
+#else
+	.long	.LFB5
+#endif
+	.long	.LFE5-.LFB5	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI9-.LFB5
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI10-.LCFI9
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win32_asm.s b/lib/wrappers/libffi/gcc/win32_asm.s
new file mode 100644
index 000000000..7a3e7f16c
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win32_asm.s
@@ -0,0 +1,736 @@
+# 1 "gcc\\win32_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win32_asm.asm"
+# 33 "gcc\\win32_asm.asm"
+# 1 "common/fficonfig.h" 1
+# 34 "gcc\\win32_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 35 "gcc\\win32_asm.asm" 2
+
+
+ .text
+
+
+        .balign 16
+ .globl _ffi_call_win32
+
+ .def _ffi_call_win32; .scl 2; .type 32; .endef
+
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl %esp,%ebp
+.LCFI1:
+
+        movl 20(%ebp),%ecx
+        subl %ecx,%esp
+
+        movl %esp,%eax
+
+
+        pushl 12(%ebp)
+        pushl %eax
+        call *8(%ebp)
+
+
+        addl $8,%esp
+
+
+ cmpl $3, 16(%ebp)
+ jz .do_thiscall
+ cmpl $4, 16(%ebp)
+ jnz .do_fncall
+ movl (%esp), %ecx
+ movl 4(%esp), %edx
+ addl $8, %esp
+ jmp .do_fncall
+.do_thiscall:
+ movl (%esp), %ecx
+ addl $4, %esp
+
+.do_fncall:
+
+
+
+
+        call *32(%ebp)
+
+
+
+
+        movl 24(%ebp),%ecx
+
+
+        cmpl $0,28(%ebp)
+        jne 0f
+
+
+
+        cmpl $2,%ecx
+        jne .Lnoretval
+        fstp %st(0)
+
+        jmp .Lepilogue
+
+0:
+ call 1f
+
+.Lstore_table:
+ .long .Lnoretval
+ .long .Lretint
+ .long .Lretfloat
+ .long .Lretdouble
+ .long .Lretlongdouble
+ .long .Lretuint8
+ .long .Lretsint8
+ .long .Lretuint16
+ .long .Lretsint16
+ .long .Lretint
+ .long .Lretint
+ .long .Lretint64
+ .long .Lretint64
+ .long .Lretstruct
+ .long .Lretint
+ .long .Lretstruct1b
+ .long .Lretstruct2b
+ .long .Lretstruct4b
+ .long .Lretstruct
+1:
+ add %ecx, %ecx
+ add %ecx, %ecx
+ add (%esp),%ecx
+ add $4, %esp
+ jmp *(%ecx)
+
+
+.Lretsint8:
+ movsbl %al, %eax
+ jmp .Lretint
+
+.Lretsint16:
+ movswl %ax, %eax
+ jmp .Lretint
+
+.Lretuint8:
+ movzbl %al, %eax
+ jmp .Lretint
+
+.Lretuint16:
+ movzwl %ax, %eax
+ jmp .Lretint
+
+.Lretint:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretfloat:
+
+        movl 28(%ebp),%ecx
+        fstps (%ecx)
+        jmp .Lepilogue
+
+.Lretdouble:
+
+        movl 28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp .Lepilogue
+
+.Lretlongdouble:
+
+        movl 28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp .Lepilogue
+
+.Lretint64:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        movl %edx,4(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct1b:
+
+        movl 28(%ebp),%ecx
+        movb %al,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct2b:
+
+        movl 28(%ebp),%ecx
+        movw %ax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct4b:
+
+        movl 28(%ebp),%ecx
+        movl %eax,0(%ecx)
+        jmp .Lepilogue
+
+.Lretstruct:
+
+
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+ .globl _ffi_closure_THISCALL
+
+ .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ jmp .stub
+.LFE1:
+
+
+        .balign 16
+ .globl _ffi_closure_SYSV
+
+ .def _ffi_closure_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 8(%ebp), %edx
+.stub:
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+
+0:
+ call 1f
+
+.Lcls_store_table:
+ .long .Lcls_noretval
+ .long .Lcls_retint
+ .long .Lcls_retfloat
+ .long .Lcls_retdouble
+ .long .Lcls_retldouble
+ .long .Lcls_retuint8
+ .long .Lcls_retsint8
+ .long .Lcls_retuint16
+ .long .Lcls_retsint16
+ .long .Lcls_retint
+ .long .Lcls_retint
+ .long .Lcls_retllong
+ .long .Lcls_retllong
+ .long .Lcls_retstruct
+ .long .Lcls_retint
+ .long .Lcls_retstruct1
+ .long .Lcls_retstruct2
+ .long .Lcls_retstruct4
+ .long .Lcls_retmsstruct
+
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lcls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct:
+
+ movl %ebp, %esp
+ popl %ebp
+ ret $0x4
+
+.Lcls_retmsstruct:
+
+ mov %ecx, %eax
+
+ jmp .Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+
+
+
+
+
+
+        .balign 16
+ .globl _ffi_closure_raw_THISCALL
+
+ .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 12(%ebp), %edx
+ jmp .stubraw
+
+        .balign 16
+ .globl _ffi_closure_raw_SYSV
+
+ .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef
+
+_ffi_closure_raw_SYSV:
+.LFB4:
+ pushl %ebp
+.LCFI6:
+ movl %esp, %ebp
+.LCFI7:
+ pushl %esi
+.LCFI8:
+ subl $36, %esp
+ movl ((52 + 3) & ~3)(%eax), %esi
+ movl ((((52 + 3) & ~3) + 4) + 4)(%eax), %edx
+ movl %edx, 12(%esp)
+ leal 8(%ebp), %edx
+.stubraw:
+ movl %edx, 8(%esp)
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp)
+ movl %esi, (%esp)
+ call *(((52 + 3) & ~3) + 4)(%eax)
+ movl 20(%esi), %eax
+0:
+ call 1f
+
+.Lrcls_store_table:
+ .long .Lrcls_noretval
+ .long .Lrcls_retint
+ .long .Lrcls_retfloat
+ .long .Lrcls_retdouble
+ .long .Lrcls_retldouble
+ .long .Lrcls_retuint8
+ .long .Lrcls_retsint8
+ .long .Lrcls_retuint16
+ .long .Lrcls_retsint16
+ .long .Lrcls_retint
+ .long .Lrcls_retint
+ .long .Lrcls_retllong
+ .long .Lrcls_retllong
+ .long .Lrcls_retstruct
+ .long .Lrcls_retint
+ .long .Lrcls_retstruct1
+ .long .Lrcls_retstruct2
+ .long .Lrcls_retstruct4
+ .long .Lrcls_retstruct
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lrcls_retsint8:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retsint16:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint8:
+ movzbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint16:
+ movzwl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct1:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct2:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct4:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct:
+
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+
+
+
+ .balign 16
+ .globl _ffi_closure_STDCALL
+
+ .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef
+
+_ffi_closure_STDCALL:
+.LFB5:
+ pushl %ebp
+.LCFI9:
+ movl %esp, %ebp
+.LCFI10:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp)
+ leal 12(%ebp), %edx
+ movl %edx, 4(%esp)
+ leal -12(%ebp), %edx
+ movl %edx, (%esp)
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+0:
+ call 1f
+
+.Lscls_store_table:
+ .long .Lscls_noretval
+ .long .Lscls_retint
+ .long .Lscls_retfloat
+ .long .Lscls_retdouble
+ .long .Lscls_retldouble
+ .long .Lscls_retuint8
+ .long .Lscls_retsint8
+ .long .Lscls_retuint16
+ .long .Lscls_retsint16
+ .long .Lscls_retint
+ .long .Lscls_retint
+ .long .Lscls_retllong
+ .long .Lscls_retllong
+ .long .Lscls_retstruct
+ .long .Lscls_retint
+ .long .Lscls_retstruct1
+ .long .Lscls_retstruct2
+ .long .Lscls_retstruct4
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+
+.Lscls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retint:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retfloat:
+ flds (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retdouble:
+ fldl (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retldouble:
+ fldt (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct:
+
+
+.Lscls_noretval:
+.Lscls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+
+ .section .eh_frame,"w"
+
+.Lframe1:
+.LSCIE1:
+ .long .LECIE1-.LASCIE1
+.LASCIE1:
+ .long 0x0
+ .byte 0x1
+
+
+
+ .ascii "\0"
+
+ .byte 0x1
+ .byte 0x7c
+ .byte 0x8
+
+
+
+
+ .byte 0xc
+ .byte 0x4
+ .byte 0x4
+ .byte 0x88
+ .byte 0x1
+ .align 4
+.LECIE1:
+
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1
+.LASFDE1:
+ .long .LASFDE1-.Lframe1
+
+
+
+ .long .LFB1
+
+ .long .LFE1-.LFB1
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI0-.LFB1
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI1-.LCFI0
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE1:
+
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3
+.LASFDE3:
+ .long .LASFDE3-.Lframe1
+
+
+
+ .long .LFB3
+
+ .long .LFE3-.LFB3
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI4-.LFB3
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI5-.LCFI4
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE3:
+
+
+
+.LSFDE4:
+ .long .LEFDE4-.LASFDE4
+.LASFDE4:
+ .long .LASFDE4-.Lframe1
+
+
+
+ .long .LFB4
+
+ .long .LFE4-.LFB4
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI6-.LFB4
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI7-.LCFI6
+ .byte 0xd
+ .byte 0x5
+
+ .byte 0x4
+ .long .LCFI8-.LCFI7
+ .byte 0x86
+ .byte 0x3
+
+
+ .align 4
+.LEFDE4:
+
+
+
+.LSFDE5:
+ .long .LEFDE5-.LASFDE5
+.LASFDE5:
+ .long .LASFDE5-.Lframe1
+
+
+
+ .long .LFB5
+
+ .long .LFE5-.LFB5
+
+
+
+
+
+ .byte 0x4
+ .long .LCFI9-.LFB5
+ .byte 0xe
+ .byte 0x8
+ .byte 0x85
+ .byte 0x2
+
+ .byte 0x4
+ .long .LCFI10-.LCFI9
+ .byte 0xd
+ .byte 0x5
+
+
+ .align 4
+.LEFDE5:
diff --git a/lib/wrappers/libffi/gcc/win64_asm.asm b/lib/wrappers/libffi/gcc/win64_asm.asm
new file mode 100644
index 000000000..1dc98f99a
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.asm
@@ -0,0 +1,467 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+// ffi_closure_win64 will be called with these registers set:
+//    rax points to 'closure'
+//    r11 contains a bit mask that specifies which of the
+//    first four parameters are float or double
+// // It must move the parameters passed in registers to their stack location,
+// call ffi_closure_win64_inner for the actual work, then return the result.
+// 
+	.balign 16
+        .globl SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	// copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+// ALLOCSTACK 40
+	sub	$40, %rsp
+// ENDPROLOG
+	mov	%rax, %rcx	// context is first parameter
+	mov	%rsp, %rdx	// stack is second parameter
+	add	$48, %rdx	// point to start of arguments
+	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
+	callq	*%rax		// call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	// If the closure returned a float,
+                                // ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+        // copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        // PUSHREG rbp
+	push	%rbp
+        // ALLOCSTACK 48
+	sub	$48,%rsp
+        // SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        // ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
diff --git a/lib/wrappers/libffi/gcc/win64_asm.s b/lib/wrappers/libffi/gcc/win64_asm.s
new file mode 100644
index 000000000..f2c2df10d
--- /dev/null
+++ b/lib/wrappers/libffi/gcc/win64_asm.s
@@ -0,0 +1,227 @@
+# 1 "gcc\\win64_asm.asm"
+# 1 "<command-line>"
+# 1 "gcc\\win64_asm.asm"
+
+# 1 "common/fficonfig.h" 1
+# 3 "gcc\\win64_asm.asm" 2
+# 1 "common/ffi.h" 1
+# 63 "common/ffi.h"
+# 1 "common/ffitarget.h" 1
+# 64 "common/ffi.h" 2
+# 4 "gcc\\win64_asm.asm" 2
+# 244 "gcc\\win64_asm.asm"
+.text
+
+.extern ffi_closure_win64_inner
+# 255 "gcc\\win64_asm.asm"
+ .balign 16
+        .globl ffi_closure_win64
+ffi_closure_win64:
+
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+
+ sub $40, %rsp
+
+ mov %rax, %rcx
+ mov %rsp, %rdx
+ add $48, %rdx
+ mov $SYMBOL_NAME(ffi_closure_win64_inner), %rax
+ callq *%rax
+ add $40, %rsp
+ movq %rax, %xmm0
+
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+        .globl ffi_call_win64
+ffi_call_win64:
+
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+
+ push %rbp
+
+ sub $48,%rsp
+
+ lea 32(%rsp),%rbp
+
+
+ mov 48(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, 0(%rbp)
+
+ mov 40(%rbp), %rdx
+ mov 0(%rbp), %rcx
+ callq *32(%rbp)
+
+ mov 0(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *72(%rbp)
+.Lret_struct4b:
+  cmpl $FFI_TYPE_SMALL_STRUCT_4B, 56(%rbp)
+  jne .Lret_struct2b
+
+ mov 64(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, 56(%rbp)
+ jne .Lret_struct1b
+
+ mov 64(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, 56(%rbp)
+ jne .Lret_uint8
+
+ mov 64(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, 56(%rbp)
+ jne .Lret_sint8
+
+        mov 64(%rbp), %rcx
+        movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, 56(%rbp)
+ jne .Lret_uint16
+
+        mov 64(%rbp), %rcx
+        movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, 56(%rbp)
+ jne .Lret_sint16
+
+        mov 64(%rbp), %rcx
+        movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, 56(%rbp)
+ jne .Lret_uint32
+
+        mov 64(%rbp), %rcx
+        movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $9, 56(%rbp)
+ jne .Lret_sint32
+
+        mov 64(%rbp), %rcx
+        movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+  cmpl $10, 56(%rbp)
+  jne .Lret_float
+
+ mov 64(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+  cmpl $2, 56(%rbp)
+  jne .Lret_double
+
+  mov 64(%rbp), %rax
+  movss %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_double:
+  cmpl $3, 56(%rbp)
+  jne .Lret_sint64
+
+  mov 64(%rbp), %rax
+  movlpd %xmm0, (%rax)
+  jmp .Lret_void
+
+.Lret_sint64:
+   cmpl $12, 56(%rbp)
+   jne .Lret_void
+
+  mov 64(%rbp), %rcx
+  mov %rax, (%rcx)
+  jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
author	Araq <rumpf_a@web.de>	2013-12-19 01:00:51 +0100
committer	Araq <rumpf_a@web.de>	2013-12-19 01:00:51 +0100
commit	e2a4d591e5f34685071985cb4070e96b7e053a1a (patch)
tree	faf5b13f0a1a1da6009e9095bd4d1993bb5eedb9 /lib/wrappers/libffi/gcc
parent	3d869d4decb336c1396a0bfe4db24ea9dcda043e (diff)
download	Nim-e2a4d591e5f34685071985cb4070e96b7e053a1a.tar.gz