diff options
author | Dmitry Atamanov <data-man@users.noreply.github.com> | 2017-10-28 10:25:56 +0300 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2017-10-28 09:25:56 +0200 |
commit | d2c7d391c8b69a6a590a2f702ed58bea033f6325 (patch) | |
tree | c74a1b46e1166ddb87453ddc49cea84e1baaa5ab /tinyc/i386-asm.c | |
parent | 9c00f6decd4453a4233450a60ccef05b20e9f24a (diff) | |
download | Nim-d2c7d391c8b69a6a590a2f702ed58bea033f6325.tar.gz |
TinyC upgrade (#6593)
Diffstat (limited to 'tinyc/i386-asm.c')
-rw-r--r-- | tinyc/i386-asm.c | 1145 |
1 files changed, 824 insertions, 321 deletions
diff --git a/tinyc/i386-asm.c b/tinyc/i386-asm.c index 12ff8f2ba..2e184974e 100644 --- a/tinyc/i386-asm.c +++ b/tinyc/i386-asm.c @@ -1,7 +1,8 @@ /* * i386 specific functions for TCC assembler - * + * * Copyright (c) 2001, 2002 Fabrice Bellard + * Copyright (c) 2009 Frédéric Feret (x86_64 support) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,66 +19,88 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "tcc.h" + #define MAX_OPERANDS 3 -typedef struct ASMInstr { - uint16_t sym; - uint16_t opcode; - uint16_t instr_type; -#define OPC_JMP 0x01 /* jmp operand */ -#define OPC_B 0x02 /* only used zith OPC_WL */ -#define OPC_WL 0x04 /* accepts w, l or no suffix */ -#define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */ -#define OPC_REG 0x08 /* register is added to opcode */ -#define OPC_MODRM 0x10 /* modrm encoding */ -#define OPC_FWAIT 0x20 /* add fwait opcode */ -#define OPC_TEST 0x40 /* test opcodes */ -#define OPC_SHIFT 0x80 /* shift opcodes */ -#define OPC_D16 0x0100 /* generate data16 prefix */ -#define OPC_ARITH 0x0200 /* arithmetic opcodes */ -#define OPC_SHORTJMP 0x0400 /* short jmp operand */ -#define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */ +#define TOK_ASM_first TOK_ASM_clc +#define TOK_ASM_last TOK_ASM_emms +#define TOK_ASM_alllast TOK_ASM_subps + +#define OPC_B 0x01 /* only used with OPC_WL */ +#define OPC_WL 0x02 /* accepts w, l or no suffix */ +#define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */ +#define OPC_REG 0x04 /* register is added to opcode */ +#define OPC_MODRM 0x08 /* modrm encoding */ + +#define OPCT_MASK 0x70 +#define OPC_FWAIT 0x10 /* add fwait opcode */ +#define OPC_SHIFT 0x20 /* shift opcodes */ +#define OPC_ARITH 0x30 /* arithmetic opcodes */ +#define OPC_FARITH 0x40 /* FPU arithmetic opcodes */ +#define OPC_TEST 0x50 /* test opcodes */ +#define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i)) + +#define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */ +#define OPC_48 0x200 /* Always has REX prefix */ +#ifdef TCC_TARGET_X86_64 +# define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */ +# define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */ +# define OPC_WLX OPC_WLQ +# define OPC_BWLX OPC_BWLQ +#else +# define OPC_WLX OPC_WL +# define OPC_BWLX OPC_BWL +#endif + #define OPC_GROUP_SHIFT 13 /* in order to compress the operand type, we use specific operands and - we or only with EA */ -#define OPT_REG8 0 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_REG16 1 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_REG32 2 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_MMX 3 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_SSE 4 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_CR 5 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_TR 6 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_DB 7 /* warning: value is hardcoded from TOK_ASM_xxx */ -#define OPT_SEG 8 -#define OPT_ST 9 -#define OPT_IM8 10 -#define OPT_IM8S 11 -#define OPT_IM16 12 -#define OPT_IM32 13 -#define OPT_EAX 14 /* %al, %ax or %eax register */ -#define OPT_ST0 15 /* %st(0) register */ -#define OPT_CL 16 /* %cl register */ -#define OPT_DX 17 /* %dx register */ -#define OPT_ADDR 18 /* OP_EA with only offset */ -#define OPT_INDIR 19 /* *(expr) */ - -/* composite types */ -#define OPT_COMPOSITE_FIRST 20 -#define OPT_IM 20 /* IM8 | IM16 | IM32 */ -#define OPT_REG 21 /* REG8 | REG16 | REG32 */ -#define OPT_REGW 22 /* REG16 | REG32 */ -#define OPT_IMW 23 /* IM16 | IM32 */ - -/* can be ored with any OPT_xxx */ -#define OPT_EA 0x80 - - uint8_t nb_ops; - uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */ -} ASMInstr; + we or only with EA */ +enum { + OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */ +#ifdef TCC_TARGET_X86_64 + OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */ +#endif + OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_SEG, + OPT_ST, +#ifdef TCC_TARGET_X86_64 + OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but + with REX prefix, not used in insn templates */ +#endif + OPT_IM8, + OPT_IM8S, + OPT_IM16, + OPT_IM32, +#ifdef TCC_TARGET_X86_64 + OPT_IM64, +#endif + OPT_EAX, /* %al, %ax, %eax or %rax register */ + OPT_ST0, /* %st(0) register */ + OPT_CL, /* %cl register */ + OPT_DX, /* %dx register */ + OPT_ADDR, /* OP_EA with only offset */ + OPT_INDIR, /* *(expr) */ + /* composite types */ + OPT_COMPOSITE_FIRST, + OPT_IM, /* IM8 | IM16 | IM32 */ + OPT_REG, /* REG8 | REG16 | REG32 | REG64 */ + OPT_REGW, /* REG16 | REG32 | REG64 */ + OPT_IMW, /* IM16 | IM32 */ + OPT_MMXSSE, /* MMX | SSE */ + OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */ + OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */ + /* can be ored with any OPT_xxx */ + OPT_EA = 0x80 +}; -typedef struct Operand { - uint32_t type; #define OP_REG8 (1 << OPT_REG8) #define OP_REG16 (1 << OPT_REG16) #define OP_REG32 (1 << OPT_REG32) @@ -98,26 +121,58 @@ typedef struct Operand { #define OP_DX (1 << OPT_DX) #define OP_ADDR (1 << OPT_ADDR) #define OP_INDIR (1 << OPT_INDIR) +#ifdef TCC_TARGET_X86_64 +# define OP_REG64 (1 << OPT_REG64) +# define OP_REG8_LOW (1 << OPT_REG8_LOW) +# define OP_IM64 (1 << OPT_IM64) +# define OP_EA32 (OP_EA << 1) +#else +# define OP_REG64 0 +# define OP_REG8_LOW 0 +# define OP_IM64 0 +# define OP_EA32 0 +#endif #define OP_EA 0x40000000 -#define OP_REG (OP_REG8 | OP_REG16 | OP_REG32) -#define OP_IM OP_IM32 +#define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64) + +#ifdef TCC_TARGET_X86_64 +# define TREG_XAX TREG_RAX +# define TREG_XCX TREG_RCX +# define TREG_XDX TREG_RDX +#else +# define TREG_XAX TREG_EAX +# define TREG_XCX TREG_ECX +# define TREG_XDX TREG_EDX +#endif + +typedef struct ASMInstr { + uint16_t sym; + uint16_t opcode; + uint16_t instr_type; + uint8_t nb_ops; + uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */ +} ASMInstr; + +typedef struct Operand { + uint32_t type; int8_t reg; /* register, -1 if none */ int8_t reg2; /* second register, -1 if none */ uint8_t shift; ExprValue e; } Operand; -static const uint8_t reg_to_size[5] = { +static const uint8_t reg_to_size[9] = { /* [OP_REG8] = 0, [OP_REG16] = 1, [OP_REG32] = 2, +#ifdef TCC_TARGET_X86_64 + [OP_REG64] = 3, +#endif */ - 0, 0, 1, 0, 2 + 0, 0, 1, 0, 2, 0, 0, 0, 3 }; - -#define WORD_PREFIX_OPCODE 0x66 #define NB_TEST_OPCODES 30 @@ -165,13 +220,20 @@ static const uint8_t segment_prefixes[] = { static const ASMInstr asm_instrs[] = { #define ALT(x) x +/* This removes a 0x0f in the second byte */ +#define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o))) +/* This constructs instr_type from opcode, type and group. */ +#define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0)) #define DEF_ASM_OP0(name, opcode) -#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 }, -#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }}, -#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }}, -#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }}, -#include "i386-asm.h" - +#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } }, +#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }}, +#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }}, +#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }}, +#ifdef TCC_TARGET_X86_64 +# include "x86_64-asm.h" +#else +# include "i386-asm.h" +#endif /* last operation */ { 0, }, }; @@ -183,13 +245,16 @@ static const uint16_t op0_codes[] = { #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) -#include "i386-asm.h" +#ifdef TCC_TARGET_X86_64 +# include "x86_64-asm.h" +#else +# include "i386-asm.h" +#endif }; static inline int get_reg_shift(TCCState *s1) { int shift, v; - v = asm_int_expr(s1); switch(v) { case 1: @@ -212,21 +277,73 @@ static inline int get_reg_shift(TCCState *s1) return shift; } -static int asm_parse_reg(void) +#ifdef TCC_TARGET_X86_64 +static int asm_parse_numeric_reg(int t, unsigned int *type) { - int reg; + int reg = -1; + if (t >= TOK_IDENT && t < tok_ident) { + const char *s = table_ident[t - TOK_IDENT]->str; + char c; + *type = OP_REG64; + if (*s == 'c') { + s++; + *type = OP_CR; + } + if (*s++ != 'r') + return -1; + /* Don't allow leading '0'. */ + if ((c = *s++) >= '1' && c <= '9') + reg = c - '0'; + else + return -1; + if ((c = *s) >= '0' && c <= '5') + s++, reg = reg * 10 + c - '0'; + if (reg > 15) + return -1; + if ((c = *s) == 0) + ; + else if (*type != OP_REG64) + return -1; + else if (c == 'b' && !s[1]) + *type = OP_REG8; + else if (c == 'w' && !s[1]) + *type = OP_REG16; + else if (c == 'd' && !s[1]) + *type = OP_REG32; + else + return -1; + } + return reg; +} +#endif + +static int asm_parse_reg(unsigned int *type) +{ + int reg = 0; + *type = 0; if (tok != '%') goto error_32; next(); if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) { reg = tok - TOK_ASM_eax; - next(); - return reg; + *type = OP_REG32; +#ifdef TCC_TARGET_X86_64 + } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) { + reg = tok - TOK_ASM_rax; + *type = OP_REG64; + } else if (tok == TOK_ASM_rip) { + reg = -2; /* Probably should use different escape code. */ + *type = OP_REG64; + } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0 + && (*type == OP_REG32 || *type == OP_REG64)) { + ; +#endif } else { error_32: - expect("32 bit register"); - return 0; + expect("register"); } + next(); + return reg; } static void parse_operand(TCCState *s1, Operand *op) @@ -247,11 +364,11 @@ static void parse_operand(TCCState *s1, Operand *op) reg = tok - TOK_ASM_al; op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */ op->reg = reg & 7; - if ((op->type & OP_REG) && op->reg == TREG_EAX) + if ((op->type & OP_REG) && op->reg == TREG_XAX) op->type |= OP_EAX; - else if (op->type == OP_REG8 && op->reg == TREG_ECX) + else if (op->type == OP_REG8 && op->reg == TREG_XCX) op->type |= OP_CL; - else if (op->type == OP_REG16 && op->reg == TREG_EDX) + else if (op->type == OP_REG16 && op->reg == TREG_XDX) op->type |= OP_DX; } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) { op->type = OP_DB; @@ -267,7 +384,7 @@ static void parse_operand(TCCState *s1, Operand *op) next(); if (tok != TOK_PPNUM) goto reg_error; - p = tokc.cstr->data; + p = tokc.str.data; reg = p[0] - '0'; if ((unsigned)reg >= 8 || p[1] != '\0') goto reg_error; @@ -278,9 +395,16 @@ static void parse_operand(TCCState *s1, Operand *op) if (op->reg == 0) op->type |= OP_ST0; goto no_skip; +#ifdef TCC_TARGET_X86_64 + } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) { + op->type = OP_REG8 | OP_REG8_LOW; + op->reg = 4 + tok - TOK_ASM_spl; + } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) { + ; +#endif } else { reg_error: - error("unknown register"); + tcc_error("unknown register %%%s", get_tok_str(tok, &tokc)); } next(); no_skip: ; @@ -289,8 +413,7 @@ static void parse_operand(TCCState *s1, Operand *op) next(); asm_expr(s1, &e); op->type = OP_IM32; - op->e.v = e.v; - op->e.sym = e.sym; + op->e = e; if (!op->e.sym) { if (op->e.v == (uint8_t)op->e.v) op->type |= OP_IM8; @@ -298,6 +421,10 @@ static void parse_operand(TCCState *s1, Operand *op) op->type |= OP_IM8S; if (op->e.v == (uint16_t)op->e.v) op->type |= OP_IM16; +#ifdef TCC_TARGET_X86_64 + if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v) + op->type = OP_IM64; +#endif } } else { /* address(reg,reg2,shift) with all variants */ @@ -307,27 +434,42 @@ static void parse_operand(TCCState *s1, Operand *op) op->shift = 0; if (tok != '(') { asm_expr(s1, &e); - op->e.v = e.v; - op->e.sym = e.sym; + op->e = e; } else { - op->e.v = 0; - op->e.sym = NULL; + next(); + if (tok == '%') { + unget_tok('('); + op->e.v = 0; + op->e.sym = NULL; + } else { + /* bracketed offset expression */ + asm_expr(s1, &e); + if (tok != ')') + expect(")"); + next(); + op->e.v = e.v; + op->e.sym = e.sym; + } + op->e.pcrel = 0; } if (tok == '(') { + unsigned int type = 0; next(); if (tok != ',') { - op->reg = asm_parse_reg(); + op->reg = asm_parse_reg(&type); } if (tok == ',') { next(); if (tok != ',') { - op->reg2 = asm_parse_reg(); - } + op->reg2 = asm_parse_reg(&type); + } if (tok == ',') { next(); op->shift = get_reg_shift(s1); } } + if (type & OP_REG32) + op->type |= OP_EA32; skip(')'); } if (op->reg == -1 && op->reg2 == -1) @@ -337,46 +479,44 @@ static void parse_operand(TCCState *s1, Operand *op) } /* XXX: unify with C code output ? */ -static void gen_expr32(ExprValue *pe) +ST_FUNC void gen_expr32(ExprValue *pe) +{ + if (pe->pcrel) + /* If PC-relative, always set VT_SYM, even without symbol, + so as to force a relocation to be emitted. */ + gen_addrpc32(VT_SYM, pe->sym, pe->v); + else + gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); +} + +#ifdef TCC_TARGET_X86_64 +ST_FUNC void gen_expr64(ExprValue *pe) { - if (pe->sym) - greloc(cur_text_section, pe->sym, ind, R_386_32); - gen_le32(pe->v); + gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v); } +#endif /* XXX: unify with C code output ? */ static void gen_disp32(ExprValue *pe) { - Sym *sym; - sym = pe->sym; - if (sym) { - if (sym->r == cur_text_section->sh_num) { - /* same section: we can output an absolute value. Note - that the TCC compiler behaves differently here because - it always outputs a relocation to ease (future) code - elimination in the linker */ - gen_le32(pe->v + (long)sym->next - ind - 4); - } else { - greloc(cur_text_section, sym, ind, R_386_PC32); - gen_le32(pe->v - 4); - } + Sym *sym = pe->sym; + if (sym && sym->r == cur_text_section->sh_num) { + /* same section: we can output an absolute value. Note + that the TCC compiler behaves differently here because + it always outputs a relocation to ease (future) code + elimination in the linker */ + gen_le32(pe->v + sym->jnext - ind - 4); } else { - /* put an empty PC32 relocation */ - put_elf_reloc(symtab_section, cur_text_section, - ind, R_386_PC32, 0); - gen_le32(pe->v - 4); + if (sym && sym->type.t == VT_VOID) { + sym->type.t = VT_FUNC; + sym->type.ref = NULL; + } + gen_addrpc32(VT_SYM, sym, pe->v); } } - -static void gen_le16(int v) -{ - g(v); - g(v >> 8); -} - /* generate the modrm operand */ -static inline void asm_modrm(int reg, Operand *op) +static inline int asm_modrm(int reg, Operand *op) { int mod, reg1, reg2, sib_reg1; @@ -384,8 +524,20 @@ static inline void asm_modrm(int reg, Operand *op) g(0xc0 + (reg << 3) + op->reg); } else if (op->reg == -1 && op->reg2 == -1) { /* displacement only */ +#ifdef TCC_TARGET_X86_64 + g(0x04 + (reg << 3)); + g(0x25); +#else + g(0x05 + (reg << 3)); +#endif + gen_expr32(&op->e); +#ifdef TCC_TARGET_X86_64 + } else if (op->reg == -2) { + ExprValue *pe = &op->e; g(0x05 + (reg << 3)); - gen_expr32(&op->e); + gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); + return ind; +#endif } else { sib_reg1 = op->reg; /* fist compute displacement encoding */ @@ -411,44 +563,158 @@ static inline void asm_modrm(int reg, Operand *op) reg2 = 4; /* indicate no index */ g((op->shift << 6) + (reg2 << 3) + sib_reg1); } - /* add offset */ if (mod == 0x40) { g(op->e.v); } else if (mod == 0x80 || op->reg == -1) { - gen_expr32(&op->e); + gen_expr32(&op->e); + } + } + return 0; +} + +#ifdef TCC_TARGET_X86_64 +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 + +static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type, + int regi, int rmi) +{ + unsigned char rex = width64 ? 0x48 : 0; + int saw_high_8bit = 0; + int i; + if (rmi == -1) { + /* No mod/rm byte, but we might have a register op nevertheless + (we will add it to the opcode later). */ + for(i = 0; i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_ST)) { + if (ops[i].reg >= 8) { + rex |= REX_B; + ops[i].reg -= 8; + } else if (ops[i].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[i].type & OP_REG8 && ops[i].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[i].reg; + break; + } + } + } else { + if (regi != -1) { + if (ops[regi].reg >= 8) { + rex |= REX_R; + ops[regi].reg -= 8; + } else if (ops[regi].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[regi].reg; + } + if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) { + if (ops[rmi].reg >= 8) { + rex |= REX_B; + ops[rmi].reg -= 8; + } else if (ops[rmi].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[rmi].reg; + } + if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) { + rex |= REX_X; + ops[rmi].reg2 -= 8; + } + } + if (rex) { + if (saw_high_8bit) + tcc_error("can't encode register %%%ch when REX prefix is required", + "acdb"[saw_high_8bit-4]); + g(rex); + } +} +#endif + +static void maybe_print_stats (void) +{ + static int already = 1; + if (!already) + /* print stats about opcodes */ + { + const struct ASMInstr *pa; + int freq[4]; + int op_vals[500]; + int nb_op_vals, i, j; + + already = 1; + nb_op_vals = 0; + memset(freq, 0, sizeof(freq)); + for(pa = asm_instrs; pa->sym != 0; pa++) { + freq[pa->nb_ops]++; + //for(i=0;i<pa->nb_ops;i++) { + for(j=0;j<nb_op_vals;j++) { + //if (pa->op_type[i] == op_vals[j]) + if (pa->instr_type == op_vals[j]) + goto found; + } + //op_vals[nb_op_vals++] = pa->op_type[i]; + op_vals[nb_op_vals++] = pa->instr_type; + found: ; + //} + } + for(i=0;i<nb_op_vals;i++) { + int v = op_vals[i]; + //if ((v & (v - 1)) != 0) + printf("%3d: %08x\n", i, v); } + printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n", + (int)sizeof(asm_instrs), + (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr), + freq[0], freq[1], freq[2], freq[3]); } } -static void asm_opcode(TCCState *s1, int opcode) +ST_FUNC void asm_opcode(TCCState *s1, int opcode) { const ASMInstr *pa; - int i, modrm_index, reg, v, op1, is_short_jmp, seg_prefix; - int nb_ops, s, ss; + int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc; + int nb_ops, s; Operand ops[MAX_OPERANDS], *pop; int op_type[3]; /* decoded op type */ + int alltypes; /* OR of all operand types */ + int autosize; + int p66; +#ifdef TCC_TARGET_X86_64 + int rex64; +#endif + + maybe_print_stats(); + /* force synthetic ';' after prefix instruction, so we can handle */ + /* one-line things like "rep stosb" instead of only "rep\nstosb" */ + if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz) + unget_tok(';'); /* get operands */ pop = ops; nb_ops = 0; seg_prefix = 0; + alltypes = 0; for(;;) { if (tok == ';' || tok == TOK_LINEFEED) break; if (nb_ops >= MAX_OPERANDS) { - error("incorrect number of operands"); + tcc_error("incorrect number of operands"); } parse_operand(s1, pop); if (tok == ':') { - if (pop->type != OP_SEG || seg_prefix) { - error("incorrect prefix"); - } + if (pop->type != OP_SEG || seg_prefix) + tcc_error("incorrect prefix"); seg_prefix = segment_prefixes[pop->reg]; next(); parse_operand(s1, pop); if (!(pop->type & OP_EA)) { - error("segment prefix must be followed by memory reference"); + tcc_error("segment prefix must be followed by memory reference"); } } pop++; @@ -458,35 +724,55 @@ static void asm_opcode(TCCState *s1, int opcode) next(); } - is_short_jmp = 0; s = 0; /* avoid warning */ - + /* optimize matching by using a lookup table (no hashing is needed !) */ for(pa = asm_instrs; pa->sym != 0; pa++) { + int it = pa->instr_type & OPCT_MASK; s = 0; - if (pa->instr_type & OPC_FARITH) { + if (it == OPC_FARITH) { v = opcode - pa->sym; if (!((unsigned)v < 8 * 6 && (v % 6) == 0)) continue; - } else if (pa->instr_type & OPC_ARITH) { - if (!(opcode >= pa->sym && opcode < pa->sym + 8 * 4)) + } else if (it == OPC_ARITH) { + if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX)) continue; - goto compute_size; - } else if (pa->instr_type & OPC_SHIFT) { - if (!(opcode >= pa->sym && opcode < pa->sym + 7 * 4)) + s = (opcode - pa->sym) % NBWLX; + if ((pa->instr_type & OPC_BWLX) == OPC_WLX) + { + /* We need to reject the xxxb opcodes that we accepted above. + Note that pa->sym for WLX opcodes is the 'w' token, + to get the 'b' token subtract one. */ + if (((opcode - pa->sym + 1) % NBWLX) == 0) + continue; + s++; + } + } else if (it == OPC_SHIFT) { + if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX)) continue; - goto compute_size; - } else if (pa->instr_type & OPC_TEST) { + s = (opcode - pa->sym) % NBWLX; + } else if (it == OPC_TEST) { if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES)) continue; + /* cmovxx is a test opcode but accepts multiple sizes. + TCC doesn't accept the suffixed mnemonic, instead we + simply force size autodetection always. */ + if (pa->instr_type & OPC_WLX) + s = NBWLX - 1; } else if (pa->instr_type & OPC_B) { - if (!(opcode >= pa->sym && opcode <= pa->sym + 3)) +#ifdef TCC_TARGET_X86_64 + /* Some instructions don't have the full size but only + bwl form. insb e.g. */ + if ((pa->instr_type & OPC_WLQ) != OPC_WLQ + && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) + continue; +#endif + if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX)) continue; - compute_size: - s = (opcode - pa->sym) & 3; - } else if (pa->instr_type & OPC_WL) { - if (!(opcode >= pa->sym && opcode <= pa->sym + 2)) + s = opcode - pa->sym; + } else if (pa->instr_type & OPC_WLX) { + if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) continue; s = opcode - pa->sym + 1; } else { @@ -495,7 +781,17 @@ static void asm_opcode(TCCState *s1, int opcode) } if (pa->nb_ops != nb_ops) continue; +#ifdef TCC_TARGET_X86_64 + /* Special case for moves. Selecting the IM64->REG64 form + should only be done if we really have an >32bit imm64, and that + is hardcoded. Ignore it here. */ + if (pa->opcode == 0xb0 && ops[0].type != OP_IM64 + && (ops[1].type & OP_REG) == OP_REG64 + && !(pa->instr_type & OPC_0F)) + continue; +#endif /* now decode and check each operand */ + alltypes = 0; for(i = 0; i < nb_ops; i++) { int op1, op2; op1 = pa->op_type[i]; @@ -505,73 +801,146 @@ static void asm_opcode(TCCState *s1, int opcode) v = OP_IM8 | OP_IM16 | OP_IM32; break; case OPT_REG: - v = OP_REG8 | OP_REG16 | OP_REG32; + v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64; break; case OPT_REGW: - v = OP_REG16 | OP_REG32; + v = OP_REG16 | OP_REG32 | OP_REG64; break; case OPT_IMW: v = OP_IM16 | OP_IM32; break; + case OPT_MMXSSE: + v = OP_MMX | OP_SSE; + break; + case OPT_DISP: + case OPT_DISP8: + v = OP_ADDR; + break; default: v = 1 << op2; break; } if (op1 & OPT_EA) v |= OP_EA; - op_type[i] = v; + op_type[i] = v; if ((ops[i].type & v) == 0) goto next; + alltypes |= ops[i].type; } /* all is matching ! */ break; next: ; } if (pa->sym == 0) { - if (opcode >= TOK_ASM_pusha && opcode <= TOK_ASM_emms) { + if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) { int b; - b = op0_codes[opcode - TOK_ASM_pusha]; + b = op0_codes[opcode - TOK_ASM_first]; if (b & 0xff00) g(b >> 8); g(b); return; + } else if (opcode <= TOK_ASM_alllast) { + tcc_error("bad operand with opcode '%s'", + get_tok_str(opcode, NULL)); } else { - error("unknown opcode '%s'", + tcc_error("unknown opcode '%s'", get_tok_str(opcode, NULL)); } } /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */ - if (s == 3) { - for(i = 0; s == 3 && i < nb_ops; i++) { + autosize = NBWLX-1; +#ifdef TCC_TARGET_X86_64 + /* XXX the autosize should rather be zero, to not have to adjust this + all the time. */ + if ((pa->instr_type & OPC_BWLQ) == OPC_B) + autosize = NBWLX-2; +#endif + if (s == autosize) { + /* Check for register operands providing hints about the size. + Start from the end, i.e. destination operands. This matters + only for opcodes accepting different sized registers, lar and lsl + are such opcodes. */ + for(i = nb_ops - 1; s == autosize && i >= 0; i--) { if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX))) s = reg_to_size[ops[i].type & OP_REG]; } - if (s == 3) { - if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && + if (s == autosize) { + if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32))) s = 2; + else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && + (ops[0].type & OP_EA)) + s = NBWLX - 2; else - error("cannot infer opcode suffix"); + tcc_error("cannot infer opcode suffix"); } } +#ifdef TCC_TARGET_X86_64 + /* Generate addr32 prefix if needed */ + for(i = 0; i < nb_ops; i++) { + if (ops[i].type & OP_EA32) { + g(0x67); + break; + } + } +#endif /* generate data16 prefix if needed */ - ss = s; - if (s == 1 || (pa->instr_type & OPC_D16)) - g(WORD_PREFIX_OPCODE); - else if (s == 2) - s = 1; + p66 = 0; + if (s == 1) + p66 = 1; + else { + /* accepting mmx+sse in all operands --> needs 0x66 to + switch to sse mode. Accepting only sse in an operand --> is + already SSE insn and needs 0x66/f2/f3 handling. */ + for (i = 0; i < nb_ops; i++) + if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) + && ops[i].type & OP_SSE) + p66 = 1; + } + if (p66) + g(0x66); +#ifdef TCC_TARGET_X86_64 + rex64 = 0; + if (pa->instr_type & OPC_48) + rex64 = 1; + else if (s == 3 || (alltypes & OP_REG64)) { + /* generate REX prefix */ + int default64 = 0; + for(i = 0; i < nb_ops; i++) { + if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) { + /* If only 64bit regs are accepted in one operand + this is a default64 instruction without need for + REX prefixes, except for movabs(0xb8). */ + default64 = 1; + break; + } + } + /* XXX find better encoding for the default64 instructions. */ + if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop + && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl + && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw + && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq + && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp)) + && !default64) + rex64 = 1; + } +#endif + /* now generates the operation */ - if (pa->instr_type & OPC_FWAIT) + if (OPCT_IS(pa->instr_type, OPC_FWAIT)) g(0x9b); if (seg_prefix) g(seg_prefix); v = pa->opcode; - if (v == 0x69 || v == 0x69) { + if (pa->instr_type & OPC_0F) + v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); + if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { /* kludge for imul $im, %reg */ nb_ops = 3; ops[2] = ops[1]; + op_type[2] = op_type[1]; } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) { v--; /* int $3 case */ nb_ops = 0; @@ -585,27 +954,69 @@ static void asm_opcode(TCCState *s1, int opcode) nb_ops = 0; } else if (v <= 0x05) { /* arith case */ - v += ((opcode - TOK_ASM_addb) >> 2) << 3; - } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) { + v += ((opcode - TOK_ASM_addb) / NBWLX) << 3; + } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) { /* fpu arith case */ v += ((opcode - pa->sym) / 6) << 3; } + + /* search which operand will be used for modrm */ + modrm_index = -1; + modreg_index = -1; + if (pa->instr_type & OPC_MODRM) { + if (!nb_ops) { + /* A modrm opcode without operands is a special case (e.g. mfence). + It has a group and acts as if there's an register operand 0 + (ax). */ + i = 0; + ops[i].type = OP_REG; + ops[i].reg = 0; + goto modrm_found; + } + /* first look for an ea operand */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & OP_EA) + goto modrm_found; + } + /* then if not found, a register or indirection (shift instructions) */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR)) + goto modrm_found; + } +#ifdef ASM_DEBUG + tcc_error("bad op table"); +#endif + modrm_found: + modrm_index = i; + /* if a register is used in another operand then it is + used instead of group */ + for(i = 0;i < nb_ops; i++) { + int t = op_type[i]; + if (i != modrm_index && + (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) { + modreg_index = i; + break; + } + } + } +#ifdef TCC_TARGET_X86_64 + asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index); +#endif + if (pa->instr_type & OPC_REG) { + /* mov $im, %reg case */ + if (v == 0xb0 && s >= 1) + v += 7; for(i = 0; i < nb_ops; i++) { if (op_type[i] & (OP_REG | OP_ST)) { v += ops[i].reg; break; } } - /* mov $im, %reg case */ - if (pa->opcode == 0xb0 && s >= 1) - v += 7; } if (pa->instr_type & OPC_B) - v += s; - if (pa->instr_type & OPC_TEST) - v += test_bits[opcode - pa->sym]; - if (pa->instr_type & OPC_SHORTJMP) { + v += s >= 1; + if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) { Sym *sym; int jmp_disp; @@ -615,122 +1026,106 @@ static void asm_opcode(TCCState *s1, int opcode) goto no_short_jump; if (sym->r != cur_text_section->sh_num) goto no_short_jump; - jmp_disp = ops[0].e.v + (long)sym->next - ind - 2; + jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff); if (jmp_disp == (int8_t)jmp_disp) { /* OK to generate jump */ - is_short_jmp = 1; + ops[0].e.sym = 0; ops[0].e.v = jmp_disp; + op_type[0] = OP_IM8S; } else { no_short_jump: - if (pa->instr_type & OPC_JMP) { - /* long jump will be allowed. need to modify the - opcode slightly */ - if (v == 0xeb) - v = 0xe9; - else - v += 0x0f10; - } else { - error("invalid displacement"); - } + /* long jump will be allowed. need to modify the + opcode slightly */ + if (v == 0xeb) /* jmp */ + v = 0xe9; + else if (v == 0x70) /* jcc */ + v += 0x0f10; + else + tcc_error("invalid displacement"); } } - op1 = v >> 8; + if (OPCT_IS(pa->instr_type, OPC_TEST)) + v += test_bits[opcode - pa->sym]; + op1 = v >> 16; + if (op1) + g(op1); + op1 = (v >> 8) & 0xff; if (op1) g(op1); g(v); - - /* search which operand will used for modrm */ - modrm_index = 0; - if (pa->instr_type & OPC_SHIFT) { - reg = (opcode - pa->sym) >> 2; + + if (OPCT_IS(pa->instr_type, OPC_SHIFT)) { + reg = (opcode - pa->sym) / NBWLX; if (reg == 6) reg = 7; - } else if (pa->instr_type & OPC_ARITH) { - reg = (opcode - pa->sym) >> 2; - } else if (pa->instr_type & OPC_FARITH) { + } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) { + reg = (opcode - pa->sym) / NBWLX; + } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) { reg = (opcode - pa->sym) / 6; } else { reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7; } + + pc = 0; if (pa->instr_type & OPC_MODRM) { - /* first look for an ea operand */ - for(i = 0;i < nb_ops; i++) { - if (op_type[i] & OP_EA) - goto modrm_found; - } - /* then if not found, a register or indirection (shift instructions) */ - for(i = 0;i < nb_ops; i++) { - if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR)) - goto modrm_found; - } -#ifdef ASM_DEBUG - error("bad op table"); -#endif - modrm_found: - modrm_index = i; /* if a register is used in another operand then it is used instead of group */ - for(i = 0;i < nb_ops; i++) { - v = op_type[i]; - if (i != modrm_index && - (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) { - reg = ops[i].reg; - break; - } - } - - asm_modrm(reg, &ops[modrm_index]); + if (modreg_index >= 0) + reg = ops[modreg_index].reg; + pc = asm_modrm(reg, &ops[modrm_index]); } /* emit constants */ - if (pa->opcode == 0x9a || pa->opcode == 0xea) { +#ifndef TCC_TARGET_X86_64 + if (!(pa->instr_type & OPC_0F) + && (pa->opcode == 0x9a || pa->opcode == 0xea)) { /* ljmp or lcall kludge */ - gen_expr32(&ops[1].e); + gen_expr32(&ops[1].e); if (ops[0].e.sym) - error("cannot relocate"); + tcc_error("cannot relocate"); gen_le16(ops[0].e.v); - } else { - for(i = 0;i < nb_ops; i++) { - v = op_type[i]; - if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM8S | OP_ADDR)) { - /* if multiple sizes are given it means we must look - at the op size */ - if (v == (OP_IM8 | OP_IM16 | OP_IM32) || - v == (OP_IM16 | OP_IM32)) { - if (ss == 0) - v = OP_IM8; - else if (ss == 1) - v = OP_IM16; - else - v = OP_IM32; - } - if (v & (OP_IM8 | OP_IM8S)) { - if (ops[i].e.sym) - goto error_relocate; - g(ops[i].e.v); - } else if (v & OP_IM16) { - if (ops[i].e.sym) { - error_relocate: - error("cannot relocate"); - } - gen_le16(ops[i].e.v); - } else { - if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) { - if (is_short_jmp) - g(ops[i].e.v); - else - gen_disp32(&ops[i].e); - } else { - gen_expr32(&ops[i].e); - } - } + return; + } +#endif + for(i = 0;i < nb_ops; i++) { + v = op_type[i]; + if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) { + /* if multiple sizes are given it means we must look + at the op size */ + if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) { + if (s == 0) + v = OP_IM8; + else if (s == 1) + v = OP_IM16; + else if (s == 2 || (v & OP_IM64) == 0) + v = OP_IM32; + else + v = OP_IM64; + } + + if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym) + tcc_error("cannot relocate"); + + if (v & (OP_IM8 | OP_IM8S)) { + g(ops[i].e.v); + } else if (v & OP_IM16) { + gen_le16(ops[i].e.v); +#ifdef TCC_TARGET_X86_64 + } else if (v & OP_IM64) { + gen_expr64(&ops[i].e); +#endif + } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) { + gen_disp32(&ops[i].e); + } else { + gen_expr32(&ops[i].e); } } } -} -#define NB_SAVED_REGS 3 -#define NB_ASM_REGS 8 + /* after immediate operands, adjust pc-relative address */ + if (pc) + add32le(cur_text_section->data + pc - 4, pc - ind); +} /* return the constraint priority (we allocate first the lowest numbered constraints) */ @@ -761,18 +1156,21 @@ static inline int constraint_priority(const char *str) pr = 2; break; case 'r': + case 'R': + case 'p': pr = 3; break; case 'N': case 'M': case 'I': + case 'e': case 'i': case 'm': case 'g': pr = 4; break; default: - error("unknown constraint '%c'", c); + tcc_error("unknown constraint '%c'", c); pr = 0; } if (pr > priority) @@ -788,13 +1186,35 @@ static const char *skip_constraint_modifiers(const char *p) return p; } +/* If T (a token) is of the form "%reg" returns the register + number and type, otherwise return -1. */ +ST_FUNC int asm_parse_regvar (int t) +{ + const char *s; + Operand op; + if (t < TOK_IDENT) + return -1; + s = table_ident[t - TOK_IDENT]->str; + if (s[0] != '%') + return -1; + t = tok_alloc(s+1, strlen(s)-1)->tok; + unget_tok(t); + unget_tok('%'); + parse_operand(tcc_state, &op); + /* Accept only integer regs for now. */ + if (op.type & OP_REG) + return op.reg; + else + return -1; +} + #define REG_OUT_MASK 0x01 #define REG_IN_MASK 0x02 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask) -static void asm_compute_constraints(ASMOperand *operands, - int nb_operands, int nb_outputs, +ST_FUNC void asm_compute_constraints(ASMOperand *operands, + int nb_operands, int nb_outputs, const uint8_t *clobber_regs, int *pout_reg) { @@ -803,7 +1223,7 @@ static void asm_compute_constraints(ASMOperand *operands, int i, j, k, p1, p2, tmp, reg, c, reg_mask; const char *str; uint8_t regs_allocated[NB_ASM_REGS]; - + /* init fields */ for(i=0;i<nb_operands;i++) { op = &operands[i]; @@ -823,24 +1243,29 @@ static void asm_compute_constraints(ASMOperand *operands, /* this is a reference to another constraint */ k = find_constraint(operands, nb_operands, str, NULL); if ((unsigned)k >= i || i < nb_outputs) - error("invalid reference in constraint %d ('%s')", + tcc_error("invalid reference in constraint %d ('%s')", i, str); op->ref_index = k; if (operands[k].input_index >= 0) - error("cannot reference twice the same operand"); + tcc_error("cannot reference twice the same operand"); operands[k].input_index = i; op->priority = 5; + } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL + && op->vt->sym + && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) { + op->priority = 1; + op->reg = reg; } else { op->priority = constraint_priority(str); } } - + /* sort operands according to their priority */ for(i=0;i<nb_operands;i++) sorted_op[i] = i; for(i=0;i<nb_operands - 1;i++) { for(j=i+1;j<nb_operands;j++) { - p1 = operands[sorted_op[i]].priority; + p1 = operands[sorted_op[i]].priority; p2 = operands[sorted_op[j]].priority; if (p2 < p1) { tmp = sorted_op[i]; @@ -857,9 +1282,9 @@ static void asm_compute_constraints(ASMOperand *operands, regs_allocated[i] = 0; } /* esp cannot be used */ - regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK; + regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK; /* ebp cannot be used yet */ - regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK; + regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK; /* allocate registers and generate corresponding asm moves */ for(i=0;i<nb_operands;i++) { @@ -877,6 +1302,12 @@ static void asm_compute_constraints(ASMOperand *operands, } else { reg_mask = REG_IN_MASK; } + if (op->reg >= 0) { + if (is_reg_allocated(op->reg)) + tcc_error("asm regvar requests register that's taken already"); + reg = op->reg; + goto reg_found; + } try_next: c = *str++; switch(c) { @@ -887,30 +1318,30 @@ static void asm_compute_constraints(ASMOperand *operands, /* FALL THRU */ case '&': if (j >= nb_outputs) - error("'%c' modifier can only be applied to outputs", c); + tcc_error("'%c' modifier can only be applied to outputs", c); reg_mask = REG_IN_MASK | REG_OUT_MASK; goto try_next; case 'A': /* allocate both eax and edx */ - if (is_reg_allocated(TREG_EAX) || - is_reg_allocated(TREG_EDX)) + if (is_reg_allocated(TREG_XAX) || + is_reg_allocated(TREG_XDX)) goto try_next; op->is_llong = 1; - op->reg = TREG_EAX; - regs_allocated[TREG_EAX] |= reg_mask; - regs_allocated[TREG_EDX] |= reg_mask; + op->reg = TREG_XAX; + regs_allocated[TREG_XAX] |= reg_mask; + regs_allocated[TREG_XDX] |= reg_mask; break; case 'a': - reg = TREG_EAX; + reg = TREG_XAX; goto alloc_reg; case 'b': reg = 3; goto alloc_reg; case 'c': - reg = TREG_ECX; + reg = TREG_XCX; goto alloc_reg; case 'd': - reg = TREG_EDX; + reg = TREG_XDX; goto alloc_reg; case 'S': reg = 6; @@ -929,6 +1360,8 @@ static void asm_compute_constraints(ASMOperand *operands, } goto try_next; case 'r': + case 'R': + case 'p': /* A general address, for x86(64) any register is acceptable*/ /* any general register */ for(reg = 0; reg < 8; reg++) { if (!is_reg_allocated(reg)) @@ -941,6 +1374,7 @@ static void asm_compute_constraints(ASMOperand *operands, op->reg = reg; regs_allocated[reg] |= reg_mask; break; + case 'e': case 'i': if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST)) goto try_next; @@ -978,7 +1412,7 @@ static void asm_compute_constraints(ASMOperand *operands, } break; default: - error("asm constraint %d ('%s') could not be satisfied", + tcc_error("asm constraint %d ('%s') could not be satisfied", j, op->constraint); break; } @@ -988,34 +1422,34 @@ static void asm_compute_constraints(ASMOperand *operands, operands[op->input_index].is_llong = op->is_llong; } } - + /* compute out_reg. It is used to store outputs registers to memory locations references by pointers (VT_LLOCAL case) */ *pout_reg = -1; for(i=0;i<nb_operands;i++) { op = &operands[i]; - if (op->reg >= 0 && + if (op->reg >= 0 && (op->vt->r & VT_VALMASK) == VT_LLOCAL && !op->is_memory) { for(reg = 0; reg < 8; reg++) { if (!(regs_allocated[reg] & REG_OUT_MASK)) goto reg_found2; } - error("could not find free output register for reloading"); + tcc_error("could not find free output register for reloading"); reg_found2: *pout_reg = reg; break; } } - + /* print sorted constraints */ #ifdef ASM_DEBUG for(i=0;i<nb_operands;i++) { j = sorted_op[i]; op = &operands[j]; - printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", - j, - op->id ? get_tok_str(op->id, NULL) : "", + printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", + j, + op->id ? get_tok_str(op->id, NULL) : "", op->constraint, op->vt->r, op->reg); @@ -1025,7 +1459,7 @@ static void asm_compute_constraints(ASMOperand *operands, #endif } -static void subst_asm_operand(CString *add_str, +ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier) { int r, reg, size, val; @@ -1033,42 +1467,70 @@ static void subst_asm_operand(CString *add_str, r = sv->r; if ((r & VT_VALMASK) == VT_CONST) { - if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n') + if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' && + modifier != 'P') cstr_ccat(add_str, '$'); if (r & VT_SYM) { - cstr_cat(add_str, get_tok_str(sv->sym->v, NULL)); - if (sv->c.i != 0) { - cstr_ccat(add_str, '+'); - } else { - return; - } + const char *name = get_tok_str(sv->sym->v, NULL); + if (sv->sym->v >= SYM_FIRST_ANOM) { + /* In case of anonymous symbols ("L.42", used + for static data labels) we can't find them + in the C symbol table when later looking up + this name. So enter them now into the asm label + list when we still know the symbol. */ + get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym); + } + cstr_cat(add_str, name, -1); + if ((uint32_t)sv->c.i == 0) + goto no_offset; + cstr_ccat(add_str, '+'); } val = sv->c.i; if (modifier == 'n') val = -val; - snprintf(buf, sizeof(buf), "%d", sv->c.i); - cstr_cat(add_str, buf); + snprintf(buf, sizeof(buf), "%d", (int)sv->c.i); + cstr_cat(add_str, buf, -1); + no_offset:; +#ifdef TCC_TARGET_X86_64 + if (r & VT_LVAL) + cstr_cat(add_str, "(%rip)", -1); +#endif } else if ((r & VT_VALMASK) == VT_LOCAL) { - snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i); - cstr_cat(add_str, buf); +#ifdef TCC_TARGET_X86_64 + snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i); +#else + snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i); +#endif + cstr_cat(add_str, buf, -1); } else if (r & VT_LVAL) { reg = r & VT_VALMASK; if (reg >= VT_CONST) - error("internal compiler error"); - snprintf(buf, sizeof(buf), "(%%%s)", - get_tok_str(TOK_ASM_eax + reg, NULL)); - cstr_cat(add_str, buf); + tcc_error("internal compiler error"); + snprintf(buf, sizeof(buf), "(%%%s)", +#ifdef TCC_TARGET_X86_64 + get_tok_str(TOK_ASM_rax + reg, NULL) +#else + get_tok_str(TOK_ASM_eax + reg, NULL) +#endif + ); + cstr_cat(add_str, buf, -1); } else { /* register case */ reg = r & VT_VALMASK; if (reg >= VT_CONST) - error("internal compiler error"); + tcc_error("internal compiler error"); /* choose register operand size */ - if ((sv->type.t & VT_BTYPE) == VT_BYTE) + if ((sv->type.t & VT_BTYPE) == VT_BYTE || + (sv->type.t & VT_BTYPE) == VT_BOOL) size = 1; else if ((sv->type.t & VT_BTYPE) == VT_SHORT) size = 2; +#ifdef TCC_TARGET_X86_64 + else if ((sv->type.t & VT_BTYPE) == VT_LLONG || + (sv->type.t & VT_BTYPE) == VT_PTR) + size = 8; +#endif else size = 4; if (size == 1 && reg >= 4) @@ -1076,14 +1538,20 @@ static void subst_asm_operand(CString *add_str, if (modifier == 'b') { if (reg >= 4) - error("cannot use byte register"); + tcc_error("cannot use byte register"); size = 1; } else if (modifier == 'h') { if (reg >= 4) - error("cannot use byte register"); + tcc_error("cannot use byte register"); size = -1; } else if (modifier == 'w') { size = 2; + } else if (modifier == 'k') { + size = 4; +#ifdef TCC_TARGET_X86_64 + } else if (modifier == 'q') { + size = 8; +#endif } switch(size) { @@ -1099,14 +1567,19 @@ static void subst_asm_operand(CString *add_str, default: reg = TOK_ASM_eax + reg; break; +#ifdef TCC_TARGET_X86_64 + case 8: + reg = TOK_ASM_rax + reg; + break; +#endif } snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL)); - cstr_cat(add_str, buf); + cstr_cat(add_str, buf, -1); } } /* generate prolog and epilog code for asm statement */ -static void asm_gen_code(ASMOperand *operands, int nb_operands, +ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, int nb_outputs, int is_output, uint8_t *clobber_regs, int out_reg) @@ -1114,7 +1587,18 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, uint8_t regs_allocated[NB_ASM_REGS]; ASMOperand *op; int i, reg; - static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 }; + + /* Strictly speaking %Xbp and %Xsp should be included in the + call-preserved registers, but currently it doesn't matter. */ +#ifdef TCC_TARGET_X86_64 +#ifdef TCC_TARGET_PE + static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 }; +#else + static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 }; +#endif +#else + static uint8_t reg_saved[] = { 3, 6, 7 }; +#endif /* mark all used registers */ memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated)); @@ -1125,10 +1609,13 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, } if (!is_output) { /* generate reg save code */ - for(i = 0; i < NB_SAVED_REGS; i++) { + for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) { reg = reg_saved[i]; - if (regs_allocated[reg]) + if (regs_allocated[reg]) { + if (reg >= 8) + g(0x41), reg-=8; g(0x50 + reg); + } } /* generate load code */ @@ -1141,7 +1628,8 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, output cases) */ SValue sv; sv = *op->vt; - sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL; + sv.type.t = VT_PTR; load(op->reg, &sv); } else if (i >= nb_outputs || op->is_rw) { /* load value in register */ @@ -1149,8 +1637,8 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, if (op->is_llong) { SValue sv; sv = *op->vt; - sv.c.ul += 4; - load(TREG_EDX, &sv); + sv.c.i += 4; + load(TREG_XDX, &sv); } } } @@ -1165,8 +1653,10 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, SValue sv; sv = *op->vt; sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; + sv.type.t = VT_PTR; load(out_reg, &sv); + sv = *op->vt; sv.r = (sv.r & ~VT_VALMASK) | out_reg; store(op->reg, &sv); } @@ -1175,28 +1665,35 @@ static void asm_gen_code(ASMOperand *operands, int nb_operands, if (op->is_llong) { SValue sv; sv = *op->vt; - sv.c.ul += 4; - store(TREG_EDX, &sv); + sv.c.i += 4; + store(TREG_XDX, &sv); } } } } /* generate reg restore code */ - for(i = NB_SAVED_REGS - 1; i >= 0; i--) { + for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) { reg = reg_saved[i]; - if (regs_allocated[reg]) + if (regs_allocated[reg]) { + if (reg >= 8) + g(0x41), reg-=8; g(0x58 + reg); + } } } } -static void asm_clobber(uint8_t *clobber_regs, const char *str) +ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) { int reg; TokenSym *ts; +#ifdef TCC_TARGET_X86_64 + unsigned int type; +#endif - if (!strcmp(str, "memory") || - !strcmp(str, "cc")) + if (!strcmp(str, "memory") || + !strcmp(str, "cc") || + !strcmp(str, "flags")) return; ts = tok_alloc(str, strlen(str)); reg = ts->tok; @@ -1204,8 +1701,14 @@ static void asm_clobber(uint8_t *clobber_regs, const char *str) reg -= TOK_ASM_eax; } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) { reg -= TOK_ASM_ax; +#ifdef TCC_TARGET_X86_64 + } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) { + reg -= TOK_ASM_rax; + } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) { + ; +#endif } else { - error("invalid clobber register '%s'", str); + tcc_error("invalid clobber register '%s'", str); } clobber_regs[reg] = 1; } |