summary refs log tree commit diff stats
path: root/tinyc/i386-asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'tinyc/i386-asm.c')
-rw-r--r--tinyc/i386-asm.c1714
1 files changed, 0 insertions, 1714 deletions
diff --git a/tinyc/i386-asm.c b/tinyc/i386-asm.c
deleted file mode 100644
index 2e184974e..000000000
--- a/tinyc/i386-asm.c
+++ /dev/null
@@ -1,1714 +0,0 @@
-/*
- *  i386 specific functions for TCC assembler
- *
- *  Copyright (c) 2001, 2002 Fabrice Bellard
- *  Copyright (c) 2009 Frédéric Feret (x86_64 support)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "tcc.h"
-
-#define MAX_OPERANDS 3
-
-#define TOK_ASM_first TOK_ASM_clc
-#define TOK_ASM_last TOK_ASM_emms
-#define TOK_ASM_alllast TOK_ASM_subps
-
-#define OPC_B          0x01  /* only used with OPC_WL */
-#define OPC_WL         0x02  /* accepts w, l or no suffix */
-#define OPC_BWL        (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
-#define OPC_REG        0x04 /* register is added to opcode */
-#define OPC_MODRM      0x08 /* modrm encoding */
-
-#define OPCT_MASK      0x70
-#define OPC_FWAIT      0x10 /* add fwait opcode */
-#define OPC_SHIFT      0x20 /* shift opcodes */
-#define OPC_ARITH      0x30 /* arithmetic opcodes */
-#define OPC_FARITH     0x40 /* FPU arithmetic opcodes */
-#define OPC_TEST       0x50 /* test opcodes */
-#define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
-
-#define OPC_0F        0x100 /* Is secondary map (0x0f prefix) */
-#define OPC_48        0x200 /* Always has REX prefix */
-#ifdef TCC_TARGET_X86_64
-# define OPC_WLQ     0x1000  /* accepts w, l, q or no suffix */
-# define OPC_BWLQ    (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
-# define OPC_WLX     OPC_WLQ
-# define OPC_BWLX    OPC_BWLQ
-#else
-# define OPC_WLX     OPC_WL
-# define OPC_BWLX    OPC_BWL
-#endif
-
-#define OPC_GROUP_SHIFT 13
-
-/* in order to compress the operand type, we use specific operands and
-   we or only with EA  */
-enum {
-    OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_REG16,  /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_REG32,  /* warning: value is hardcoded from TOK_ASM_xxx */
-#ifdef TCC_TARGET_X86_64
-    OPT_REG64,  /* warning: value is hardcoded from TOK_ASM_xxx */
-#endif
-    OPT_MMX,    /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_SSE,    /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_CR,     /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_TR,     /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_DB,     /* warning: value is hardcoded from TOK_ASM_xxx */
-    OPT_SEG,
-    OPT_ST,
-#ifdef TCC_TARGET_X86_64
-    OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
-		     with REX prefix, not used in insn templates */
-#endif
-    OPT_IM8,
-    OPT_IM8S,
-    OPT_IM16,
-    OPT_IM32,
-#ifdef TCC_TARGET_X86_64
-    OPT_IM64,
-#endif
-    OPT_EAX,    /* %al, %ax, %eax or %rax register */
-    OPT_ST0,    /* %st(0) register */
-    OPT_CL,     /* %cl register */
-    OPT_DX,     /* %dx register */
-    OPT_ADDR,   /* OP_EA with only offset */
-    OPT_INDIR,  /* *(expr) */
-    /* composite types */
-    OPT_COMPOSITE_FIRST,
-    OPT_IM,     /* IM8 | IM16 | IM32 */
-    OPT_REG,    /* REG8 | REG16 | REG32 | REG64 */
-    OPT_REGW,   /* REG16 | REG32 | REG64 */
-    OPT_IMW,    /* IM16 | IM32 */
-    OPT_MMXSSE, /* MMX | SSE */
-    OPT_DISP,   /* Like OPT_ADDR, but emitted as displacement (for jumps) */
-    OPT_DISP8,  /* Like OPT_ADDR, but only 8bit (short jumps) */
-    /* can be ored with any OPT_xxx */
-    OPT_EA = 0x80
-};
-
-#define OP_REG8   (1 << OPT_REG8)
-#define OP_REG16  (1 << OPT_REG16)
-#define OP_REG32  (1 << OPT_REG32)
-#define OP_MMX    (1 << OPT_MMX)
-#define OP_SSE    (1 << OPT_SSE)
-#define OP_CR     (1 << OPT_CR)
-#define OP_TR     (1 << OPT_TR)
-#define OP_DB     (1 << OPT_DB)
-#define OP_SEG    (1 << OPT_SEG)
-#define OP_ST     (1 << OPT_ST)
-#define OP_IM8    (1 << OPT_IM8)
-#define OP_IM8S   (1 << OPT_IM8S)
-#define OP_IM16   (1 << OPT_IM16)
-#define OP_IM32   (1 << OPT_IM32)
-#define OP_EAX    (1 << OPT_EAX)
-#define OP_ST0    (1 << OPT_ST0)
-#define OP_CL     (1 << OPT_CL)
-#define OP_DX     (1 << OPT_DX)
-#define OP_ADDR   (1 << OPT_ADDR)
-#define OP_INDIR  (1 << OPT_INDIR)
-#ifdef TCC_TARGET_X86_64
-# define OP_REG64 (1 << OPT_REG64)
-# define OP_REG8_LOW (1 << OPT_REG8_LOW)
-# define OP_IM64  (1 << OPT_IM64)
-# define OP_EA32  (OP_EA << 1)
-#else
-# define OP_REG64 0
-# define OP_REG8_LOW 0
-# define OP_IM64  0
-# define OP_EA32  0
-#endif
-
-#define OP_EA     0x40000000
-#define OP_REG    (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
-
-#ifdef TCC_TARGET_X86_64
-# define TREG_XAX   TREG_RAX
-# define TREG_XCX   TREG_RCX
-# define TREG_XDX   TREG_RDX
-#else
-# define TREG_XAX   TREG_EAX
-# define TREG_XCX   TREG_ECX
-# define TREG_XDX   TREG_EDX
-#endif
-
-typedef struct ASMInstr {
-    uint16_t sym;
-    uint16_t opcode;
-    uint16_t instr_type;
-    uint8_t nb_ops;
-    uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
-} ASMInstr;
-
-typedef struct Operand {
-    uint32_t type;
-    int8_t  reg; /* register, -1 if none */
-    int8_t  reg2; /* second register, -1 if none */
-    uint8_t shift;
-    ExprValue e;
-} Operand;
-
-static const uint8_t reg_to_size[9] = {
-/*
-    [OP_REG8] = 0,
-    [OP_REG16] = 1,
-    [OP_REG32] = 2,
-#ifdef TCC_TARGET_X86_64
-    [OP_REG64] = 3,
-#endif
-*/
-    0, 0, 1, 0, 2, 0, 0, 0, 3
-};
-
-#define NB_TEST_OPCODES 30
-
-static const uint8_t test_bits[NB_TEST_OPCODES] = {
- 0x00, /* o */
- 0x01, /* no */
- 0x02, /* b */
- 0x02, /* c */
- 0x02, /* nae */
- 0x03, /* nb */
- 0x03, /* nc */
- 0x03, /* ae */
- 0x04, /* e */
- 0x04, /* z */
- 0x05, /* ne */
- 0x05, /* nz */
- 0x06, /* be */
- 0x06, /* na */
- 0x07, /* nbe */
- 0x07, /* a */
- 0x08, /* s */
- 0x09, /* ns */
- 0x0a, /* p */
- 0x0a, /* pe */
- 0x0b, /* np */
- 0x0b, /* po */
- 0x0c, /* l */
- 0x0c, /* nge */
- 0x0d, /* nl */
- 0x0d, /* ge */
- 0x0e, /* le */
- 0x0e, /* ng */
- 0x0f, /* nle */
- 0x0f, /* g */
-};
-
-static const uint8_t segment_prefixes[] = {
- 0x26, /* es */
- 0x2e, /* cs */
- 0x36, /* ss */
- 0x3e, /* ds */
- 0x64, /* fs */
- 0x65  /* gs */
-};
-
-static const ASMInstr asm_instrs[] = {
-#define ALT(x) x
-/* This removes a 0x0f in the second byte */
-#define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
-/* This constructs instr_type from opcode, type and group.  */
-#define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
-#define DEF_ASM_OP0(name, opcode)
-#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
-#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
-#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
-#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
-#ifdef TCC_TARGET_X86_64
-# include "x86_64-asm.h"
-#else
-# include "i386-asm.h"
-#endif
-    /* last operation */
-    { 0, },
-};
-
-static const uint16_t op0_codes[] = {
-#define ALT(x)
-#define DEF_ASM_OP0(x, opcode) opcode,
-#define DEF_ASM_OP0L(name, opcode, group, instr_type)
-#define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
-#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
-#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
-#ifdef TCC_TARGET_X86_64
-# include "x86_64-asm.h"
-#else
-# include "i386-asm.h"
-#endif
-};
-
-static inline int get_reg_shift(TCCState *s1)
-{
-    int shift, v;
-    v = asm_int_expr(s1);
-    switch(v) {
-    case 1:
-        shift = 0;
-        break;
-    case 2:
-        shift = 1;
-        break;
-    case 4:
-        shift = 2;
-        break;
-    case 8:
-        shift = 3;
-        break;
-    default:
-        expect("1, 2, 4 or 8 constant");
-        shift = 0;
-        break;
-    }
-    return shift;
-}
-
-#ifdef TCC_TARGET_X86_64
-static int asm_parse_numeric_reg(int t, unsigned int *type)
-{
-    int reg = -1;
-    if (t >= TOK_IDENT && t < tok_ident) {
-	const char *s = table_ident[t - TOK_IDENT]->str;
-	char c;
-	*type = OP_REG64;
-	if (*s == 'c') {
-	    s++;
-	    *type = OP_CR;
-	}
-	if (*s++ != 'r')
-	  return -1;
-	/* Don't allow leading '0'.  */
-	if ((c = *s++) >= '1' && c <= '9')
-	  reg = c - '0';
-	else
-	  return -1;
-	if ((c = *s) >= '0' && c <= '5')
-	  s++, reg = reg * 10 + c - '0';
-	if (reg > 15)
-	  return -1;
-	if ((c = *s) == 0)
-	  ;
-	else if (*type != OP_REG64)
-	  return -1;
-	else if (c == 'b' && !s[1])
-	  *type = OP_REG8;
-	else if (c == 'w' && !s[1])
-	  *type = OP_REG16;
-	else if (c == 'd' && !s[1])
-	  *type = OP_REG32;
-	else
-	  return -1;
-    }
-    return reg;
-}
-#endif
-
-static int asm_parse_reg(unsigned int *type)
-{
-    int reg = 0;
-    *type = 0;
-    if (tok != '%')
-        goto error_32;
-    next();
-    if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
-        reg = tok - TOK_ASM_eax;
-	*type = OP_REG32;
-#ifdef TCC_TARGET_X86_64
-    } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
-        reg = tok - TOK_ASM_rax;
-	*type = OP_REG64;
-    } else if (tok == TOK_ASM_rip) {
-        reg = -2; /* Probably should use different escape code. */
-	*type = OP_REG64;
-    } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
-	       && (*type == OP_REG32 || *type == OP_REG64)) {
-	;
-#endif
-    } else {
-    error_32:
-        expect("register");
-    }
-    next();
-    return reg;
-}
-
-static void parse_operand(TCCState *s1, Operand *op)
-{
-    ExprValue e;
-    int reg, indir;
-    const char *p;
-
-    indir = 0;
-    if (tok == '*') {
-        next();
-        indir = OP_INDIR;
-    }
-
-    if (tok == '%') {
-        next();
-        if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
-            reg = tok - TOK_ASM_al;
-            op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
-            op->reg = reg & 7;
-            if ((op->type & OP_REG) && op->reg == TREG_XAX)
-                op->type |= OP_EAX;
-            else if (op->type == OP_REG8 && op->reg == TREG_XCX)
-                op->type |= OP_CL;
-            else if (op->type == OP_REG16 && op->reg == TREG_XDX)
-                op->type |= OP_DX;
-        } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
-            op->type = OP_DB;
-            op->reg = tok - TOK_ASM_dr0;
-        } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
-            op->type = OP_SEG;
-            op->reg = tok - TOK_ASM_es;
-        } else if (tok == TOK_ASM_st) {
-            op->type = OP_ST;
-            op->reg = 0;
-            next();
-            if (tok == '(') {
-                next();
-                if (tok != TOK_PPNUM)
-                    goto reg_error;
-                p = tokc.str.data;
-                reg = p[0] - '0';
-                if ((unsigned)reg >= 8 || p[1] != '\0')
-                    goto reg_error;
-                op->reg = reg;
-                next();
-                skip(')');
-            }
-            if (op->reg == 0)
-                op->type |= OP_ST0;
-            goto no_skip;
-#ifdef TCC_TARGET_X86_64
-	} else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
-	    op->type = OP_REG8 | OP_REG8_LOW;
-	    op->reg = 4 + tok - TOK_ASM_spl;
-        } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
-	    ;
-#endif
-        } else {
-        reg_error:
-            tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
-        }
-        next();
-    no_skip: ;
-    } else if (tok == '$') {
-        /* constant value */
-        next();
-        asm_expr(s1, &e);
-        op->type = OP_IM32;
-        op->e = e;
-        if (!op->e.sym) {
-            if (op->e.v == (uint8_t)op->e.v)
-                op->type |= OP_IM8;
-            if (op->e.v == (int8_t)op->e.v)
-                op->type |= OP_IM8S;
-            if (op->e.v == (uint16_t)op->e.v)
-                op->type |= OP_IM16;
-#ifdef TCC_TARGET_X86_64
-            if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
-                op->type = OP_IM64;
-#endif
-        }
-    } else {
-        /* address(reg,reg2,shift) with all variants */
-        op->type = OP_EA;
-        op->reg = -1;
-        op->reg2 = -1;
-        op->shift = 0;
-        if (tok != '(') {
-            asm_expr(s1, &e);
-            op->e = e;
-        } else {
-            next();
-            if (tok == '%') {
-                unget_tok('(');
-                op->e.v = 0;
-                op->e.sym = NULL;
-            } else {
-                /* bracketed offset expression */
-                asm_expr(s1, &e);
-                if (tok != ')')
-                    expect(")");
-                next();
-                op->e.v = e.v;
-                op->e.sym = e.sym;
-            }
-	    op->e.pcrel = 0;
-        }
-        if (tok == '(') {
-	    unsigned int type = 0;
-            next();
-            if (tok != ',') {
-                op->reg = asm_parse_reg(&type);
-            }
-            if (tok == ',') {
-                next();
-                if (tok != ',') {
-                    op->reg2 = asm_parse_reg(&type);
-                }
-                if (tok == ',') {
-                    next();
-                    op->shift = get_reg_shift(s1);
-                }
-            }
-	    if (type & OP_REG32)
-	        op->type |= OP_EA32;
-            skip(')');
-        }
-        if (op->reg == -1 && op->reg2 == -1)
-            op->type |= OP_ADDR;
-    }
-    op->type |= indir;
-}
-
-/* XXX: unify with C code output ? */
-ST_FUNC void gen_expr32(ExprValue *pe)
-{
-    if (pe->pcrel)
-        /* If PC-relative, always set VT_SYM, even without symbol,
-	   so as to force a relocation to be emitted.  */
-	gen_addrpc32(VT_SYM, pe->sym, pe->v);
-    else
-	gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
-}
-
-#ifdef TCC_TARGET_X86_64
-ST_FUNC void gen_expr64(ExprValue *pe)
-{
-    gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
-}
-#endif
-
-/* XXX: unify with C code output ? */
-static void gen_disp32(ExprValue *pe)
-{
-    Sym *sym = pe->sym;
-    if (sym && sym->r == cur_text_section->sh_num) {
-        /* same section: we can output an absolute value. Note
-           that the TCC compiler behaves differently here because
-           it always outputs a relocation to ease (future) code
-           elimination in the linker */
-        gen_le32(pe->v + sym->jnext - ind - 4);
-    } else {
-        if (sym && sym->type.t == VT_VOID) {
-            sym->type.t = VT_FUNC;
-            sym->type.ref = NULL;
-        }
-        gen_addrpc32(VT_SYM, sym, pe->v);
-    }
-}
-
-/* generate the modrm operand */
-static inline int asm_modrm(int reg, Operand *op)
-{
-    int mod, reg1, reg2, sib_reg1;
-
-    if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
-        g(0xc0 + (reg << 3) + op->reg);
-    } else if (op->reg == -1 && op->reg2 == -1) {
-        /* displacement only */
-#ifdef TCC_TARGET_X86_64
-	g(0x04 + (reg << 3));
-	g(0x25);
-#else
-	g(0x05 + (reg << 3));
-#endif
-	gen_expr32(&op->e);
-#ifdef TCC_TARGET_X86_64
-    } else if (op->reg == -2) {
-        ExprValue *pe = &op->e;
-        g(0x05 + (reg << 3));
-        gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
-        return ind;
-#endif
-    } else {
-        sib_reg1 = op->reg;
-        /* fist compute displacement encoding */
-        if (sib_reg1 == -1) {
-            sib_reg1 = 5;
-            mod = 0x00;
-        } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
-            mod = 0x00;
-        } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
-            mod = 0x40;
-        } else {
-            mod = 0x80;
-        }
-        /* compute if sib byte needed */
-        reg1 = op->reg;
-        if (op->reg2 != -1)
-            reg1 = 4;
-        g(mod + (reg << 3) + reg1);
-        if (reg1 == 4) {
-            /* add sib byte */
-            reg2 = op->reg2;
-            if (reg2 == -1)
-                reg2 = 4; /* indicate no index */
-            g((op->shift << 6) + (reg2 << 3) + sib_reg1);
-        }
-        /* add offset */
-        if (mod == 0x40) {
-            g(op->e.v);
-        } else if (mod == 0x80 || op->reg == -1) {
-	    gen_expr32(&op->e);
-        }
-    }
-    return 0;
-}
-
-#ifdef TCC_TARGET_X86_64
-#define REX_W 0x48
-#define REX_R 0x44
-#define REX_X 0x42
-#define REX_B 0x41
-
-static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
-		    int regi, int rmi)
-{
-  unsigned char rex = width64 ? 0x48 : 0;
-  int saw_high_8bit = 0;
-  int i;
-  if (rmi == -1) {
-      /* No mod/rm byte, but we might have a register op nevertheless
-         (we will add it to the opcode later).  */
-      for(i = 0; i < nb_ops; i++) {
-	  if (op_type[i] & (OP_REG | OP_ST)) {
-	      if (ops[i].reg >= 8) {
-		  rex |= REX_B;
-		  ops[i].reg -= 8;
-	      } else if (ops[i].type & OP_REG8_LOW)
-		  rex |= 0x40;
-	      else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
-		  /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
-		  saw_high_8bit = ops[i].reg;
-	      break;
-	  }
-      }
-  } else {
-      if (regi != -1) {
-	  if (ops[regi].reg >= 8) {
-	      rex |= REX_R;
-	      ops[regi].reg -= 8;
-	  } else if (ops[regi].type & OP_REG8_LOW)
-	      rex |= 0x40;
-	  else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
-	      /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
-	      saw_high_8bit = ops[regi].reg;
-      }
-      if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
-	  if (ops[rmi].reg >= 8) {
-	      rex |= REX_B;
-	      ops[rmi].reg -= 8;
-	  } else if (ops[rmi].type & OP_REG8_LOW)
-	      rex |= 0x40;
-	  else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
-	      /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
-	      saw_high_8bit = ops[rmi].reg;
-      }
-      if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
-	  rex |= REX_X;
-	  ops[rmi].reg2 -= 8;
-      }
-  }
-  if (rex) {
-      if (saw_high_8bit)
-	  tcc_error("can't encode register %%%ch when REX prefix is required",
-		    "acdb"[saw_high_8bit-4]);
-      g(rex);
-  }
-}
-#endif
-
-static void maybe_print_stats (void)
-{
-  static int already = 1;
-  if (!already)
-    /* print stats about opcodes */
-    {
-        const struct ASMInstr *pa;
-        int freq[4];
-        int op_vals[500];
-        int nb_op_vals, i, j;
-
-	already = 1;
-        nb_op_vals = 0;
-        memset(freq, 0, sizeof(freq));
-        for(pa = asm_instrs; pa->sym != 0; pa++) {
-            freq[pa->nb_ops]++;
-            //for(i=0;i<pa->nb_ops;i++) {
-                for(j=0;j<nb_op_vals;j++) {
-                    //if (pa->op_type[i] == op_vals[j])
-                    if (pa->instr_type == op_vals[j])
-                        goto found;
-                }
-                //op_vals[nb_op_vals++] = pa->op_type[i];
-                op_vals[nb_op_vals++] = pa->instr_type;
-            found: ;
-            //}
-        }
-        for(i=0;i<nb_op_vals;i++) {
-            int v = op_vals[i];
-            //if ((v & (v - 1)) != 0)
-                printf("%3d: %08x\n", i, v);
-        }
-        printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
-               (int)sizeof(asm_instrs),
-	       (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
-               freq[0], freq[1], freq[2], freq[3]);
-    }
-}
-
-ST_FUNC void asm_opcode(TCCState *s1, int opcode)
-{
-    const ASMInstr *pa;
-    int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
-    int nb_ops, s;
-    Operand ops[MAX_OPERANDS], *pop;
-    int op_type[3]; /* decoded op type */
-    int alltypes;   /* OR of all operand types */
-    int autosize;
-    int p66;
-#ifdef TCC_TARGET_X86_64
-    int rex64;
-#endif
-
-    maybe_print_stats();
-    /* force synthetic ';' after prefix instruction, so we can handle */
-    /* one-line things like "rep stosb" instead of only "rep\nstosb" */
-    if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
-        unget_tok(';');
-
-    /* get operands */
-    pop = ops;
-    nb_ops = 0;
-    seg_prefix = 0;
-    alltypes = 0;
-    for(;;) {
-        if (tok == ';' || tok == TOK_LINEFEED)
-            break;
-        if (nb_ops >= MAX_OPERANDS) {
-            tcc_error("incorrect number of operands");
-        }
-        parse_operand(s1, pop);
-        if (tok == ':') {
-           if (pop->type != OP_SEG || seg_prefix)
-               tcc_error("incorrect prefix");
-           seg_prefix = segment_prefixes[pop->reg];
-           next();
-           parse_operand(s1, pop);
-           if (!(pop->type & OP_EA)) {
-               tcc_error("segment prefix must be followed by memory reference");
-           }
-        }
-        pop++;
-        nb_ops++;
-        if (tok != ',')
-            break;
-        next();
-    }
-
-    s = 0; /* avoid warning */
-
-    /* optimize matching by using a lookup table (no hashing is needed
-       !) */
-    for(pa = asm_instrs; pa->sym != 0; pa++) {
-	int it = pa->instr_type & OPCT_MASK;
-        s = 0;
-        if (it == OPC_FARITH) {
-            v = opcode - pa->sym;
-            if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
-                continue;
-        } else if (it == OPC_ARITH) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
-                continue;
-            s = (opcode - pa->sym) % NBWLX;
-	    if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
-	      {
-		/* We need to reject the xxxb opcodes that we accepted above.
-		   Note that pa->sym for WLX opcodes is the 'w' token,
-		   to get the 'b' token subtract one.  */
-		if (((opcode - pa->sym + 1) % NBWLX) == 0)
-		    continue;
-	        s++;
-	      }
-        } else if (it == OPC_SHIFT) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
-                continue;
-            s = (opcode - pa->sym) % NBWLX;
-        } else if (it == OPC_TEST) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
-                continue;
-	    /* cmovxx is a test opcode but accepts multiple sizes.
-	       TCC doesn't accept the suffixed mnemonic, instead we 
-	       simply force size autodetection always.  */
-	    if (pa->instr_type & OPC_WLX)
-	        s = NBWLX - 1;
-        } else if (pa->instr_type & OPC_B) {
-#ifdef TCC_TARGET_X86_64
-	    /* Some instructions don't have the full size but only
-	       bwl form.  insb e.g. */
-	    if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
-		&& !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
-	        continue;
-#endif
-            if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
-                continue;
-            s = opcode - pa->sym;
-        } else if (pa->instr_type & OPC_WLX) {
-            if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
-                continue;
-            s = opcode - pa->sym + 1;
-        } else {
-            if (pa->sym != opcode)
-                continue;
-        }
-        if (pa->nb_ops != nb_ops)
-            continue;
-#ifdef TCC_TARGET_X86_64
-	/* Special case for moves.  Selecting the IM64->REG64 form
-	   should only be done if we really have an >32bit imm64, and that
-	   is hardcoded.  Ignore it here.  */
-	if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
-	    && (ops[1].type & OP_REG) == OP_REG64
-	    && !(pa->instr_type & OPC_0F))
-	    continue;
-#endif
-        /* now decode and check each operand */
-	alltypes = 0;
-        for(i = 0; i < nb_ops; i++) {
-            int op1, op2;
-            op1 = pa->op_type[i];
-            op2 = op1 & 0x1f;
-            switch(op2) {
-            case OPT_IM:
-                v = OP_IM8 | OP_IM16 | OP_IM32;
-                break;
-            case OPT_REG:
-                v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
-                break;
-            case OPT_REGW:
-                v = OP_REG16 | OP_REG32 | OP_REG64;
-                break;
-            case OPT_IMW:
-                v = OP_IM16 | OP_IM32;
-                break;
-	    case OPT_MMXSSE:
-		v = OP_MMX | OP_SSE;
-		break;
-	    case OPT_DISP:
-	    case OPT_DISP8:
-		v = OP_ADDR;
-		break;
-            default:
-                v = 1 << op2;
-                break;
-            }
-            if (op1 & OPT_EA)
-                v |= OP_EA;
-	    op_type[i] = v;
-            if ((ops[i].type & v) == 0)
-                goto next;
-	    alltypes |= ops[i].type;
-        }
-        /* all is matching ! */
-        break;
-    next: ;
-    }
-    if (pa->sym == 0) {
-        if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
-            int b;
-            b = op0_codes[opcode - TOK_ASM_first];
-            if (b & 0xff00) 
-                g(b >> 8);
-            g(b);
-            return;
-        } else if (opcode <= TOK_ASM_alllast) {
-            tcc_error("bad operand with opcode '%s'",
-                  get_tok_str(opcode, NULL));
-        } else {
-            tcc_error("unknown opcode '%s'",
-                  get_tok_str(opcode, NULL));
-        }
-    }
-    /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
-    autosize = NBWLX-1;
-#ifdef TCC_TARGET_X86_64
-    /* XXX the autosize should rather be zero, to not have to adjust this
-       all the time.  */
-    if ((pa->instr_type & OPC_BWLQ) == OPC_B)
-        autosize = NBWLX-2;
-#endif
-    if (s == autosize) {
-	/* Check for register operands providing hints about the size.
-	   Start from the end, i.e. destination operands.  This matters
-	   only for opcodes accepting different sized registers, lar and lsl
-	   are such opcodes.  */
-        for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
-            if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
-                s = reg_to_size[ops[i].type & OP_REG];
-        }
-        if (s == autosize) {
-            if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
-                (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
-                s = 2;
-	    else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
-		     (ops[0].type & OP_EA))
-	        s = NBWLX - 2;
-            else
-                tcc_error("cannot infer opcode suffix");
-        }
-    }
-
-#ifdef TCC_TARGET_X86_64
-    /* Generate addr32 prefix if needed */
-    for(i = 0; i < nb_ops; i++) {
-        if (ops[i].type & OP_EA32) {
-	    g(0x67);
-	    break;
-        }
-    }
-#endif
-    /* generate data16 prefix if needed */
-    p66 = 0;
-    if (s == 1)
-        p66 = 1;
-    else {
-	/* accepting mmx+sse in all operands --> needs 0x66 to
-	   switch to sse mode.  Accepting only sse in an operand --> is
-	   already SSE insn and needs 0x66/f2/f3 handling.  */
-        for (i = 0; i < nb_ops; i++)
-            if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
-	        && ops[i].type & OP_SSE)
-	        p66 = 1;
-    }
-    if (p66)
-        g(0x66);
-#ifdef TCC_TARGET_X86_64
-    rex64 = 0;
-    if (pa->instr_type & OPC_48)
-        rex64 = 1;
-    else if (s == 3 || (alltypes & OP_REG64)) {
-        /* generate REX prefix */
-	int default64 = 0;
-	for(i = 0; i < nb_ops; i++) {
-	    if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
-		/* If only 64bit regs are accepted in one operand
-		   this is a default64 instruction without need for
-		   REX prefixes, except for movabs(0xb8).  */
-		default64 = 1;
-		break;
-	    }
-	}
-	/* XXX find better encoding for the default64 instructions.  */
-        if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
-	      && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
-	      && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
-	      && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
-	      && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
-	    && !default64)
-            rex64 = 1;
-    }
-#endif
-
-    /* now generates the operation */
-    if (OPCT_IS(pa->instr_type, OPC_FWAIT))
-        g(0x9b);
-    if (seg_prefix)
-        g(seg_prefix);
-
-    v = pa->opcode;
-    if (pa->instr_type & OPC_0F)
-        v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
-    if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
-        /* kludge for imul $im, %reg */
-        nb_ops = 3;
-        ops[2] = ops[1];
-        op_type[2] = op_type[1];
-    } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
-        v--; /* int $3 case */
-        nb_ops = 0;
-    } else if ((v == 0x06 || v == 0x07)) {
-        if (ops[0].reg >= 4) {
-            /* push/pop %fs or %gs */
-            v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
-        } else {
-            v += ops[0].reg << 3;
-        }
-        nb_ops = 0;
-    } else if (v <= 0x05) {
-        /* arith case */
-        v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
-    } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
-        /* fpu arith case */
-        v += ((opcode - pa->sym) / 6) << 3;
-    }
-
-    /* search which operand will be used for modrm */
-    modrm_index = -1;
-    modreg_index = -1;
-    if (pa->instr_type & OPC_MODRM) {
-	if (!nb_ops) {
-	    /* A modrm opcode without operands is a special case (e.g. mfence).
-	       It has a group and acts as if there's an register operand 0
-	       (ax).  */
-	    i = 0;
-	    ops[i].type = OP_REG;
-	    ops[i].reg = 0;
-	    goto modrm_found;
-	}
-        /* first look for an ea operand */
-        for(i = 0;i < nb_ops; i++) {
-            if (op_type[i] & OP_EA)
-                goto modrm_found;
-        }
-        /* then if not found, a register or indirection (shift instructions) */
-        for(i = 0;i < nb_ops; i++) {
-            if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
-                goto modrm_found;
-        }
-#ifdef ASM_DEBUG
-        tcc_error("bad op table");
-#endif
-    modrm_found:
-        modrm_index = i;
-        /* if a register is used in another operand then it is
-           used instead of group */
-        for(i = 0;i < nb_ops; i++) {
-            int t = op_type[i];
-            if (i != modrm_index &&
-                (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
-                modreg_index = i;
-                break;
-            }
-        }
-    }
-#ifdef TCC_TARGET_X86_64
-    asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
-#endif
-
-    if (pa->instr_type & OPC_REG) {
-        /* mov $im, %reg case */
-        if (v == 0xb0 && s >= 1)
-            v += 7;
-        for(i = 0; i < nb_ops; i++) {
-            if (op_type[i] & (OP_REG | OP_ST)) {
-                v += ops[i].reg;
-                break;
-            }
-        }
-    }
-    if (pa->instr_type & OPC_B)
-        v += s >= 1;
-    if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
-        Sym *sym;
-        int jmp_disp;
-
-        /* see if we can really generate the jump with a byte offset */
-        sym = ops[0].e.sym;
-        if (!sym)
-            goto no_short_jump;
-        if (sym->r != cur_text_section->sh_num)
-            goto no_short_jump;
-        jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
-        if (jmp_disp == (int8_t)jmp_disp) {
-            /* OK to generate jump */
-	    ops[0].e.sym = 0;
-            ops[0].e.v = jmp_disp;
-	    op_type[0] = OP_IM8S;
-        } else {
-        no_short_jump:
-	    /* long jump will be allowed. need to modify the
-	       opcode slightly */
-	    if (v == 0xeb) /* jmp */
-	        v = 0xe9;
-	    else if (v == 0x70) /* jcc */
-	        v += 0x0f10;
-	    else
-	        tcc_error("invalid displacement");
-        }
-    }
-    if (OPCT_IS(pa->instr_type, OPC_TEST))
-        v += test_bits[opcode - pa->sym];
-    op1 = v >> 16;
-    if (op1)
-        g(op1);
-    op1 = (v >> 8) & 0xff;
-    if (op1)
-        g(op1);
-    g(v);
-
-    if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
-        reg = (opcode - pa->sym) / NBWLX;
-        if (reg == 6)
-            reg = 7;
-    } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
-        reg = (opcode - pa->sym) / NBWLX;
-    } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
-        reg = (opcode - pa->sym) / 6;
-    } else {
-        reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
-    }
-
-    pc = 0;
-    if (pa->instr_type & OPC_MODRM) {
-        /* if a register is used in another operand then it is
-           used instead of group */
-	if (modreg_index >= 0)
-	    reg = ops[modreg_index].reg;
-        pc = asm_modrm(reg, &ops[modrm_index]);
-    }
-
-    /* emit constants */
-#ifndef TCC_TARGET_X86_64
-    if (!(pa->instr_type & OPC_0F)
-	&& (pa->opcode == 0x9a || pa->opcode == 0xea)) {
-        /* ljmp or lcall kludge */
-	gen_expr32(&ops[1].e);
-        if (ops[0].e.sym)
-            tcc_error("cannot relocate");
-        gen_le16(ops[0].e.v);
-        return;
-    }
-#endif
-    for(i = 0;i < nb_ops; i++) {
-        v = op_type[i];
-        if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
-            /* if multiple sizes are given it means we must look
-               at the op size */
-            if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
-                if (s == 0)
-                    v = OP_IM8;
-                else if (s == 1)
-                    v = OP_IM16;
-                else if (s == 2 || (v & OP_IM64) == 0)
-                    v = OP_IM32;
-                else
-                    v = OP_IM64;
-            }
-
-            if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
-                tcc_error("cannot relocate");
-
-            if (v & (OP_IM8 | OP_IM8S)) {
-                g(ops[i].e.v);
-            } else if (v & OP_IM16) {
-                gen_le16(ops[i].e.v);
-#ifdef TCC_TARGET_X86_64
-            } else if (v & OP_IM64) {
-                gen_expr64(&ops[i].e);
-#endif
-	    } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
-                gen_disp32(&ops[i].e);
-            } else {
-                gen_expr32(&ops[i].e);
-            }
-        }
-    }
-
-    /* after immediate operands, adjust pc-relative address */
-    if (pc)
-        add32le(cur_text_section->data + pc - 4, pc - ind);
-}
-
-/* return the constraint priority (we allocate first the lowest
-   numbered constraints) */
-static inline int constraint_priority(const char *str)
-{
-    int priority, c, pr;
-
-    /* we take the lowest priority */
-    priority = 0;
-    for(;;) {
-        c = *str;
-        if (c == '\0')
-            break;
-        str++;
-        switch(c) {
-        case 'A':
-            pr = 0;
-            break;
-        case 'a':
-        case 'b':
-        case 'c':
-        case 'd':
-        case 'S':
-        case 'D':
-            pr = 1;
-            break;
-        case 'q':
-            pr = 2;
-            break;
-        case 'r':
-	case 'R':
-	case 'p':
-            pr = 3;
-            break;
-        case 'N':
-        case 'M':
-        case 'I':
-	case 'e':
-        case 'i':
-        case 'm':
-        case 'g':
-            pr = 4;
-            break;
-        default:
-            tcc_error("unknown constraint '%c'", c);
-            pr = 0;
-        }
-        if (pr > priority)
-            priority = pr;
-    }
-    return priority;
-}
-
-static const char *skip_constraint_modifiers(const char *p)
-{
-    while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
-        p++;
-    return p;
-}
-
-/* If T (a token) is of the form "%reg" returns the register
-   number and type, otherwise return -1.  */
-ST_FUNC int asm_parse_regvar (int t)
-{
-    const char *s;
-    Operand op;
-    if (t < TOK_IDENT)
-        return -1;
-    s = table_ident[t - TOK_IDENT]->str;
-    if (s[0] != '%')
-        return -1;
-    t = tok_alloc(s+1, strlen(s)-1)->tok;
-    unget_tok(t);
-    unget_tok('%');
-    parse_operand(tcc_state, &op);
-    /* Accept only integer regs for now.  */
-    if (op.type & OP_REG)
-        return op.reg;
-    else
-        return -1;
-}
-
-#define REG_OUT_MASK 0x01
-#define REG_IN_MASK  0x02
-
-#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
-
-ST_FUNC void asm_compute_constraints(ASMOperand *operands,
-                                    int nb_operands, int nb_outputs,
-                                    const uint8_t *clobber_regs,
-                                    int *pout_reg)
-{
-    ASMOperand *op;
-    int sorted_op[MAX_ASM_OPERANDS];
-    int i, j, k, p1, p2, tmp, reg, c, reg_mask;
-    const char *str;
-    uint8_t regs_allocated[NB_ASM_REGS];
-
-    /* init fields */
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        op->input_index = -1;
-        op->ref_index = -1;
-        op->reg = -1;
-        op->is_memory = 0;
-        op->is_rw = 0;
-    }
-    /* compute constraint priority and evaluate references to output
-       constraints if input constraints */
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        str = op->constraint;
-        str = skip_constraint_modifiers(str);
-        if (isnum(*str) || *str == '[') {
-            /* this is a reference to another constraint */
-            k = find_constraint(operands, nb_operands, str, NULL);
-            if ((unsigned)k >= i || i < nb_outputs)
-                tcc_error("invalid reference in constraint %d ('%s')",
-                      i, str);
-            op->ref_index = k;
-            if (operands[k].input_index >= 0)
-                tcc_error("cannot reference twice the same operand");
-            operands[k].input_index = i;
-            op->priority = 5;
-	} else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
-		   && op->vt->sym
-		   && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
-	    op->priority = 1;
-	    op->reg = reg;
-        } else {
-            op->priority = constraint_priority(str);
-        }
-    }
-
-    /* sort operands according to their priority */
-    for(i=0;i<nb_operands;i++)
-        sorted_op[i] = i;
-    for(i=0;i<nb_operands - 1;i++) {
-        for(j=i+1;j<nb_operands;j++) {
-            p1 = operands[sorted_op[i]].priority;
-            p2 = operands[sorted_op[j]].priority;
-            if (p2 < p1) {
-                tmp = sorted_op[i];
-                sorted_op[i] = sorted_op[j];
-                sorted_op[j] = tmp;
-            }
-        }
-    }
-
-    for(i = 0;i < NB_ASM_REGS; i++) {
-        if (clobber_regs[i])
-            regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
-        else
-            regs_allocated[i] = 0;
-    }
-    /* esp cannot be used */
-    regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
-    /* ebp cannot be used yet */
-    regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
-
-    /* allocate registers and generate corresponding asm moves */
-    for(i=0;i<nb_operands;i++) {
-        j = sorted_op[i];
-        op = &operands[j];
-        str = op->constraint;
-        /* no need to allocate references */
-        if (op->ref_index >= 0)
-            continue;
-        /* select if register is used for output, input or both */
-        if (op->input_index >= 0) {
-            reg_mask = REG_IN_MASK | REG_OUT_MASK;
-        } else if (j < nb_outputs) {
-            reg_mask = REG_OUT_MASK;
-        } else {
-            reg_mask = REG_IN_MASK;
-        }
-	if (op->reg >= 0) {
-	    if (is_reg_allocated(op->reg))
-	        tcc_error("asm regvar requests register that's taken already");
-	    reg = op->reg;
-	    goto reg_found;
-	}
-    try_next:
-        c = *str++;
-        switch(c) {
-        case '=':
-            goto try_next;
-        case '+':
-            op->is_rw = 1;
-            /* FALL THRU */
-        case '&':
-            if (j >= nb_outputs)
-                tcc_error("'%c' modifier can only be applied to outputs", c);
-            reg_mask = REG_IN_MASK | REG_OUT_MASK;
-            goto try_next;
-        case 'A':
-            /* allocate both eax and edx */
-            if (is_reg_allocated(TREG_XAX) ||
-                is_reg_allocated(TREG_XDX))
-                goto try_next;
-            op->is_llong = 1;
-            op->reg = TREG_XAX;
-            regs_allocated[TREG_XAX] |= reg_mask;
-            regs_allocated[TREG_XDX] |= reg_mask;
-            break;
-        case 'a':
-            reg = TREG_XAX;
-            goto alloc_reg;
-        case 'b':
-            reg = 3;
-            goto alloc_reg;
-        case 'c':
-            reg = TREG_XCX;
-            goto alloc_reg;
-        case 'd':
-            reg = TREG_XDX;
-            goto alloc_reg;
-        case 'S':
-            reg = 6;
-            goto alloc_reg;
-        case 'D':
-            reg = 7;
-        alloc_reg:
-            if (is_reg_allocated(reg))
-                goto try_next;
-            goto reg_found;
-        case 'q':
-            /* eax, ebx, ecx or edx */
-            for(reg = 0; reg < 4; reg++) {
-                if (!is_reg_allocated(reg))
-                    goto reg_found;
-            }
-            goto try_next;
-        case 'r':
-	case 'R':
-	case 'p': /* A general address, for x86(64) any register is acceptable*/
-            /* any general register */
-            for(reg = 0; reg < 8; reg++) {
-                if (!is_reg_allocated(reg))
-                    goto reg_found;
-            }
-            goto try_next;
-        reg_found:
-            /* now we can reload in the register */
-            op->is_llong = 0;
-            op->reg = reg;
-            regs_allocated[reg] |= reg_mask;
-            break;
-	case 'e':
-        case 'i':
-            if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
-                goto try_next;
-            break;
-        case 'I':
-        case 'N':
-        case 'M':
-            if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
-                goto try_next;
-            break;
-        case 'm':
-        case 'g':
-            /* nothing special to do because the operand is already in
-               memory, except if the pointer itself is stored in a
-               memory variable (VT_LLOCAL case) */
-            /* XXX: fix constant case */
-            /* if it is a reference to a memory zone, it must lie
-               in a register, so we reserve the register in the
-               input registers and a load will be generated
-               later */
-            if (j < nb_outputs || c == 'm') {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
-                    /* any general register */
-                    for(reg = 0; reg < 8; reg++) {
-                        if (!(regs_allocated[reg] & REG_IN_MASK))
-                            goto reg_found1;
-                    }
-                    goto try_next;
-                reg_found1:
-                    /* now we can reload in the register */
-                    regs_allocated[reg] |= REG_IN_MASK;
-                    op->reg = reg;
-                    op->is_memory = 1;
-                }
-            }
-            break;
-        default:
-            tcc_error("asm constraint %d ('%s') could not be satisfied",
-                  j, op->constraint);
-            break;
-        }
-        /* if a reference is present for that operand, we assign it too */
-        if (op->input_index >= 0) {
-            operands[op->input_index].reg = op->reg;
-            operands[op->input_index].is_llong = op->is_llong;
-        }
-    }
-
-    /* compute out_reg. It is used to store outputs registers to memory
-       locations references by pointers (VT_LLOCAL case) */
-    *pout_reg = -1;
-    for(i=0;i<nb_operands;i++) {
-        op = &operands[i];
-        if (op->reg >= 0 &&
-            (op->vt->r & VT_VALMASK) == VT_LLOCAL  &&
-            !op->is_memory) {
-            for(reg = 0; reg < 8; reg++) {
-                if (!(regs_allocated[reg] & REG_OUT_MASK))
-                    goto reg_found2;
-            }
-            tcc_error("could not find free output register for reloading");
-        reg_found2:
-            *pout_reg = reg;
-            break;
-        }
-    }
-
-    /* print sorted constraints */
-#ifdef ASM_DEBUG
-    for(i=0;i<nb_operands;i++) {
-        j = sorted_op[i];
-        op = &operands[j];
-        printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
-               j,
-               op->id ? get_tok_str(op->id, NULL) : "",
-               op->constraint,
-               op->vt->r,
-               op->reg);
-    }
-    if (*pout_reg >= 0)
-        printf("out_reg=%d\n", *pout_reg);
-#endif
-}
-
-ST_FUNC void subst_asm_operand(CString *add_str,
-                              SValue *sv, int modifier)
-{
-    int r, reg, size, val;
-    char buf[64];
-
-    r = sv->r;
-    if ((r & VT_VALMASK) == VT_CONST) {
-        if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
-	    modifier != 'P')
-            cstr_ccat(add_str, '$');
-        if (r & VT_SYM) {
-	    const char *name = get_tok_str(sv->sym->v, NULL);
-	    if (sv->sym->v >= SYM_FIRST_ANOM) {
-		/* In case of anonymous symbols ("L.42", used
-		   for static data labels) we can't find them
-		   in the C symbol table when later looking up
-		   this name.  So enter them now into the asm label
-		   list when we still know the symbol.  */
-		get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
-	    }
-            cstr_cat(add_str, name, -1);
-            if ((uint32_t)sv->c.i == 0)
-                goto no_offset;
-	    cstr_ccat(add_str, '+');
-        }
-        val = sv->c.i;
-        if (modifier == 'n')
-            val = -val;
-        snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
-        cstr_cat(add_str, buf, -1);
-    no_offset:;
-#ifdef TCC_TARGET_X86_64
-        if (r & VT_LVAL)
-            cstr_cat(add_str, "(%rip)", -1);
-#endif
-    } else if ((r & VT_VALMASK) == VT_LOCAL) {
-#ifdef TCC_TARGET_X86_64
-        snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
-#else
-        snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
-#endif
-        cstr_cat(add_str, buf, -1);
-    } else if (r & VT_LVAL) {
-        reg = r & VT_VALMASK;
-        if (reg >= VT_CONST)
-            tcc_error("internal compiler error");
-        snprintf(buf, sizeof(buf), "(%%%s)",
-#ifdef TCC_TARGET_X86_64
-                 get_tok_str(TOK_ASM_rax + reg, NULL)
-#else
-                 get_tok_str(TOK_ASM_eax + reg, NULL)
-#endif
-		 );
-        cstr_cat(add_str, buf, -1);
-    } else {
-        /* register case */
-        reg = r & VT_VALMASK;
-        if (reg >= VT_CONST)
-            tcc_error("internal compiler error");
-
-        /* choose register operand size */
-        if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
-	    (sv->type.t & VT_BTYPE) == VT_BOOL)
-            size = 1;
-        else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
-            size = 2;
-#ifdef TCC_TARGET_X86_64
-        else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
-		 (sv->type.t & VT_BTYPE) == VT_PTR)
-            size = 8;
-#endif
-        else
-            size = 4;
-        if (size == 1 && reg >= 4)
-            size = 4;
-
-        if (modifier == 'b') {
-            if (reg >= 4)
-                tcc_error("cannot use byte register");
-            size = 1;
-        } else if (modifier == 'h') {
-            if (reg >= 4)
-                tcc_error("cannot use byte register");
-            size = -1;
-        } else if (modifier == 'w') {
-            size = 2;
-        } else if (modifier == 'k') {
-            size = 4;
-#ifdef TCC_TARGET_X86_64
-        } else if (modifier == 'q') {
-            size = 8;
-#endif
-        }
-
-        switch(size) {
-        case -1:
-            reg = TOK_ASM_ah + reg;
-            break;
-        case 1:
-            reg = TOK_ASM_al + reg;
-            break;
-        case 2:
-            reg = TOK_ASM_ax + reg;
-            break;
-        default:
-            reg = TOK_ASM_eax + reg;
-            break;
-#ifdef TCC_TARGET_X86_64
-        case 8:
-            reg = TOK_ASM_rax + reg;
-            break;
-#endif
-        }
-        snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
-        cstr_cat(add_str, buf, -1);
-    }
-}
-
-/* generate prolog and epilog code for asm statement */
-ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
-                         int nb_outputs, int is_output,
-                         uint8_t *clobber_regs,
-                         int out_reg)
-{
-    uint8_t regs_allocated[NB_ASM_REGS];
-    ASMOperand *op;
-    int i, reg;
-
-    /* Strictly speaking %Xbp and %Xsp should be included in the
-       call-preserved registers, but currently it doesn't matter.  */
-#ifdef TCC_TARGET_X86_64
-#ifdef TCC_TARGET_PE
-    static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
-#else
-    static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
-#endif
-#else
-    static uint8_t reg_saved[] = { 3, 6, 7 };
-#endif
-
-    /* mark all used registers */
-    memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
-    for(i = 0; i < nb_operands;i++) {
-        op = &operands[i];
-        if (op->reg >= 0)
-            regs_allocated[op->reg] = 1;
-    }
-    if (!is_output) {
-        /* generate reg save code */
-        for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
-            reg = reg_saved[i];
-            if (regs_allocated[reg]) {
-		if (reg >= 8)
-		  g(0x41), reg-=8;
-                g(0x50 + reg);
-            }
-        }
-
-        /* generate load code */
-        for(i = 0; i < nb_operands; i++) {
-            op = &operands[i];
-            if (op->reg >= 0) {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
-                    op->is_memory) {
-                    /* memory reference case (for both input and
-                       output cases) */
-                    SValue sv;
-                    sv = *op->vt;
-                    sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
-                    sv.type.t = VT_PTR;
-                    load(op->reg, &sv);
-                } else if (i >= nb_outputs || op->is_rw) {
-                    /* load value in register */
-                    load(op->reg, op->vt);
-                    if (op->is_llong) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.c.i += 4;
-                        load(TREG_XDX, &sv);
-                    }
-                }
-            }
-        }
-    } else {
-        /* generate save code */
-        for(i = 0 ; i < nb_outputs; i++) {
-            op = &operands[i];
-            if (op->reg >= 0) {
-                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
-                    if (!op->is_memory) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
-			sv.type.t = VT_PTR;
-                        load(out_reg, &sv);
-
-			sv = *op->vt;
-                        sv.r = (sv.r & ~VT_VALMASK) | out_reg;
-                        store(op->reg, &sv);
-                    }
-                } else {
-                    store(op->reg, op->vt);
-                    if (op->is_llong) {
-                        SValue sv;
-                        sv = *op->vt;
-                        sv.c.i += 4;
-                        store(TREG_XDX, &sv);
-                    }
-                }
-            }
-        }
-        /* generate reg restore code */
-        for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
-            reg = reg_saved[i];
-            if (regs_allocated[reg]) {
-		if (reg >= 8)
-		  g(0x41), reg-=8;
-                g(0x58 + reg);
-            }
-        }
-    }
-}
-
-ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
-{
-    int reg;
-    TokenSym *ts;
-#ifdef TCC_TARGET_X86_64
-    unsigned int type;
-#endif
-
-    if (!strcmp(str, "memory") ||
-        !strcmp(str, "cc") ||
-	!strcmp(str, "flags"))
-        return;
-    ts = tok_alloc(str, strlen(str));
-    reg = ts->tok;
-    if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
-        reg -= TOK_ASM_eax;
-    } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
-        reg -= TOK_ASM_ax;
-#ifdef TCC_TARGET_X86_64
-    } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
-        reg -= TOK_ASM_rax;
-    } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
-	;
-#endif
-    } else {
-        tcc_error("invalid clobber register '%s'", str);
-    }
-    clobber_regs[reg] = 1;
-}