diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 23 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.c | 1274 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 37 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 204 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 131 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 721 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 1942 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 20 | ||||
-rw-r--r-- | tcg/optimize.c | 380 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 3211 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 117 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.c | 2210 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 132 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 889 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 1005 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 22 | ||||
-rw-r--r-- | tcg/tcg-be-ldst.h | 6 | ||||
-rw-r--r-- | tcg/tcg-op.h | 283 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 124 | ||||
-rw-r--r-- | tcg/tcg-runtime.h | 30 | ||||
-rw-r--r-- | tcg/tcg.c | 559 | ||||
-rw-r--r-- | tcg/tcg.h | 111 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c | 131 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 12 |
28 files changed, 5846 insertions, 7752 deletions
diff --git a/tcg/README b/tcg/README index f1782123b..a550ff176 100644 --- a/tcg/README +++ b/tcg/README @@ -36,6 +36,12 @@ or a memory location which is stored in a register outside QEMU TBs A TCG "basic block" corresponds to a list of instructions terminated by a branch instruction. +An operation with "undefined behavior" may result in a crash. + +An operation with "unspecified behavior" shall not crash. However, +the result may be one of several possibilities so may be considered +an "undefined result". + 3) Intermediate representation 3.1) Introduction @@ -239,23 +245,25 @@ t0=t1|~t2 * shl_i32/i64 t0, t1, t2 -t0=t1 << t2. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) +t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) * shr_i32/i64 t0, t1, t2 -t0=t1 >> t2 (unsigned). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) +t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) * sar_i32/i64 t0, t1, t2 -t0=t1 >> t2 (signed). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) +t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) * rotl_i32/i64 t0, t1, t2 -Rotation of t2 bits to the left. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) +Rotation of t2 bits to the left. +Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) * rotr_i32/i64 t0, t1, t2 -Rotation of t2 bits to the right. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) +Rotation of t2 bits to the right. +Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) ********* Misc @@ -306,6 +314,11 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) +* trunc_shr_i32 t0, t1, pos + +For 64-bit hosts only, right shift the 64-bit input T1 by POS and +truncate to 32-bit output T0. Depending on the host, this may be +a simple mov/shift, or may require additional canonicalization. ********* Conditional moves diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 661a5af81..56dae66a3 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -23,34 +23,26 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", - "%x24", "%x25", "%x26", "%x27", "%x28", - "%fp", /* frame pointer */ - "%lr", /* link register */ - "%sp", /* stack pointer */ + "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", }; #endif /* NDEBUG */ -#ifdef TARGET_WORDS_BIGENDIAN - #define TCG_LDST_BSWAP 1 -#else - #define TCG_LDST_BSWAP 0 -#endif - static const int tcg_target_reg_alloc_order[] = { TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ - TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, - TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, + TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, + TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, TCG_REG_X16, TCG_REG_X17, - TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */ - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, - TCG_REG_X8, /* will not use, see tcg_target_init */ + /* X18 reserved by system */ + /* X19 reserved for AREG0 */ + /* X29 reserved as fp */ + /* X30 reserved as temporary */ }; static const int tcg_target_call_iarg_regs[8] = { @@ -61,60 +53,53 @@ static const int tcg_target_call_oarg_regs[1] = { TCG_REG_X0 }; -#define TCG_REG_TMP TCG_REG_X8 +#define TCG_REG_TMP TCG_REG_X30 #ifndef CONFIG_SOFTMMU -# if defined(CONFIG_USE_GUEST_BASE) -# define TCG_REG_GUEST_BASE TCG_REG_X28 +# ifdef CONFIG_USE_GUEST_BASE +# define TCG_REG_GUEST_BASE TCG_REG_X28 # else -# define TCG_REG_GUEST_BASE TCG_REG_XZR +# define TCG_REG_GUEST_BASE TCG_REG_XZR # endif #endif -static inline void reloc_pc26(void *code_ptr, intptr_t target) +static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { - intptr_t offset = (target - (intptr_t)code_ptr) / 4; + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 26)); /* read instruction, mask away previous PC_REL26 parameter contents, set the proper offset, then write back the instruction. */ - uint32_t insn = *(uint32_t *)code_ptr; - insn = deposit32(insn, 0, 26, offset); - *(uint32_t *)code_ptr = insn; + *code_ptr = deposit32(*code_ptr, 0, 26, offset); } -static inline void reloc_pc19(void *code_ptr, intptr_t target) +static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { - intptr_t offset = (target - (intptr_t)code_ptr) / 4; - /* read instruction, mask away previous PC_REL19 parameter contents, - set the proper offset, then write back the instruction. */ - uint32_t insn = *(uint32_t *)code_ptr; - insn = deposit32(insn, 5, 19, offset); - *(uint32_t *)code_ptr = insn; + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 19)); + *code_ptr = deposit32(*code_ptr, 5, 19, offset); } -static inline void patch_reloc(uint8_t *code_ptr, int type, +static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - value += addend; - + assert(addend == 0); switch (type) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: - reloc_pc26(code_ptr, value); + reloc_pc26(code_ptr, (tcg_insn_unit *)value); break; case R_AARCH64_CONDBR19: - reloc_pc19(code_ptr, value); + reloc_pc19(code_ptr, (tcg_insn_unit *)value); break; - default: tcg_abort(); } } -#define TCG_CT_CONST_IS32 0x100 -#define TCG_CT_CONST_AIMM 0x200 -#define TCG_CT_CONST_LIMM 0x400 -#define TCG_CT_CONST_ZERO 0x800 -#define TCG_CT_CONST_MONE 0x1000 +#define TCG_CT_CONST_AIMM 0x100 +#define TCG_CT_CONST_LIMM 0x200 +#define TCG_CT_CONST_ZERO 0x400 +#define TCG_CT_CONST_MONE 0x800 /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, @@ -139,9 +124,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); #endif break; - case 'w': /* The operand should be considered 32-bit. */ - ct->ct |= TCG_CT_CONST_IS32; - break; case 'A': /* Valid for arithmetic immediate (positive or negative). */ ct->ct |= TCG_CT_CONST_AIMM; break; @@ -188,7 +170,7 @@ static inline bool is_limm(uint64_t val) return (val & (val - 1)) == 0; } -static int tcg_target_const_match(tcg_target_long val, +static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; @@ -196,7 +178,7 @@ static int tcg_target_const_match(tcg_target_long val, if (ct & TCG_CT_CONST) { return 1; } - if (ct & TCG_CT_CONST_IS32) { + if (type == TCG_TYPE_I32) { val = (int32_t)val; } if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { @@ -250,19 +232,12 @@ static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { [TCG_COND_LEU] = COND_LS, }; -/* opcodes for LDR / STR instructions with base + simm9 addressing */ -enum aarch64_ldst_op_data { /* size of the data moved */ - LDST_8 = 0x38, - LDST_16 = 0x78, - LDST_32 = 0xb8, - LDST_64 = 0xf8, -}; -enum aarch64_ldst_op_type { /* type of operation */ - LDST_ST = 0x0, /* store */ - LDST_LD = 0x4, /* load */ - LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */ - LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */ -}; +typedef enum { + LDST_ST = 0, /* store */ + LDST_LD = 1, /* load */ + LDST_LD_S_X = 2, /* load and sign-extend into Xt */ + LDST_LD_S_W = 3, /* load and sign-extend into Wt */ +} AArch64LdstType; /* We encode the format of the insn into the beginning of the name, so that we can have the preprocessor help "typecheck" the insn vs the output @@ -270,6 +245,48 @@ enum aarch64_ldst_op_type { /* type of operation */ use the section number of the architecture reference manual in which the instruction group is described. */ typedef enum { + /* Compare and branch (immediate). */ + I3201_CBZ = 0x34000000, + I3201_CBNZ = 0x35000000, + + /* Conditional branch (immediate). */ + I3202_B_C = 0x54000000, + + /* Unconditional branch (immediate). */ + I3206_B = 0x14000000, + I3206_BL = 0x94000000, + + /* Unconditional branch (register). */ + I3207_BR = 0xd61f0000, + I3207_BLR = 0xd63f0000, + I3207_RET = 0xd65f0000, + + /* Load/store register. Described here as 3.3.12, but the helper + that emits them can transform to 3.3.10 or 3.3.13. */ + I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, + I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, + I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, + I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, + + I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, + I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, + I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, + I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, + + I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, + I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, + + I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, + I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, + I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, + + I3312_TO_I3310 = 0x00206800, + I3312_TO_I3313 = 0x01000000, + + /* Load/store register pair instructions. */ + I3314_LDP = 0x28400000, + I3314_STP = 0x28000000, + /* Add/subtract immediate instructions. */ I3401_ADDI = 0x11000000, I3401_ADDSI = 0x31000000, @@ -294,6 +311,10 @@ typedef enum { I3405_MOVZ = 0x52800000, I3405_MOVK = 0x72800000, + /* PC relative addressing instructions. */ + I3406_ADR = 0x10000000, + I3406_ADRP = 0x90000000, + /* Add/subtract shifted register instructions (without a shift). */ I3502_ADD = 0x0b000000, I3502_ADDS = 0x2b000000, @@ -311,6 +332,11 @@ typedef enum { I3506_CSEL = 0x1a800000, I3506_CSINC = 0x1a800400, + /* Data-processing (1 source) instructions. */ + I3507_REV16 = 0x5ac00400, + I3507_REV32 = 0x5ac00800, + I3507_REV64 = 0x5ac00c00, + /* Data-processing (2 source) instructions. */ I3508_LSLV = 0x1ac02000, I3508_LSRV = 0x1ac02400, @@ -335,87 +361,51 @@ typedef enum { I3510_ANDS = 0x6a000000, } AArch64Insn; -static inline enum aarch64_ldst_op_data -aarch64_ldst_get_data(TCGOpcode tcg_op) +static inline uint32_t tcg_in32(TCGContext *s) { - switch (tcg_op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - return LDST_8; - - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - return LDST_16; - - case INDEX_op_ld_i32: - case INDEX_op_st_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_st32_i64: - return LDST_32; + uint32_t v = *(uint32_t *)s->code_ptr; + return v; +} - case INDEX_op_ld_i64: - case INDEX_op_st_i64: - return LDST_64; +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) - default: - tcg_abort(); - } +static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rt, int imm19) +{ + tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); } -static inline enum aarch64_ldst_op_type -aarch64_ldst_get_type(TCGOpcode tcg_op) +static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, + TCGCond c, int imm19) { - switch (tcg_op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return LDST_ST; - - case INDEX_op_ld8u_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i64: - return LDST_LD; - - case INDEX_op_ld8s_i32: - case INDEX_op_ld16s_i32: - return LDST_LD_S_W; - - case INDEX_op_ld8s_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32s_i64: - return LDST_LD_S_X; + tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); +} - default: - tcg_abort(); - } +static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) +{ + tcg_out32(s, insn | (imm26 & 0x03ffffff)); } -static inline uint32_t tcg_in32(TCGContext *s) +static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) { - uint32_t v = *(uint32_t *)s->code_ptr; - return v; + tcg_out32(s, insn | rn << 5); } -/* Emit an opcode with "type-checking" of the format. */ -#define tcg_out_insn(S, FMT, OP, ...) \ - glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) +static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, + TCGReg r1, TCGReg r2, TCGReg rn, + tcg_target_long ofs, bool pre, bool w) +{ + insn |= 1u << 31; /* ext */ + insn |= pre << 24; + insn |= w << 23; + + assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); + insn |= (ofs & (0x7f << 3)) << (15 - 3); + + tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); +} static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, TCGReg rd, TCGReg rn, uint64_t aimm) @@ -457,6 +447,12 @@ static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); } +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, + TCGReg rd, int64_t disp) +{ + tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); +} + /* This function is for both 3.5.2 (Add/Subtract shifted register), for the rare occasion when we actually want to supply a shift amount. */ static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, @@ -487,32 +483,37 @@ static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, | tcg_cond_to_aarch64[c] << 12); } +static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn) +{ + tcg_out32(s, insn | ext << 31 | rn << 5 | rd); +} + static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) { tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); } +static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg base, TCGReg regoff) +{ + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | base << 5 | rd); +} + -static inline void tcg_out_ldst_9(TCGContext *s, - enum aarch64_ldst_op_data op_data, - enum aarch64_ldst_op_type op_type, - TCGReg rd, TCGReg rn, tcg_target_long offset) +static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) { - /* use LDUR with BASE register with 9bit signed unscaled offset */ - tcg_out32(s, op_data << 24 | op_type << 20 - | (offset & 0x1ff) << 12 | rn << 5 | rd); + tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); } -/* tcg_out_ldst_12 expects a scaled unsigned immediate offset */ -static inline void tcg_out_ldst_12(TCGContext *s, - enum aarch64_ldst_op_data op_data, - enum aarch64_ldst_op_type op_type, - TCGReg rd, TCGReg rn, - tcg_target_ulong scaled_uimm) +static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) { - tcg_out32(s, (op_data | 1) << 24 - | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd); + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); } /* Register to register move using ORR (shifted register with no shift). */ @@ -527,89 +528,177 @@ static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); } +/* This function is used for the Logical (immediate) instruction group. + The value of LIMM must satisfy IS_LIMM. See the comment above about + only supporting simplified logical immediates. */ +static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, uint64_t limm) +{ + unsigned h, l, r, c; + + assert(is_limm(limm)); + + h = clz64(limm); + l = ctz64(limm); + if (l == 0) { + r = 0; /* form 0....01....1 */ + c = ctz64(~limm) - 1; + if (h == 0) { + r = clz64(~limm); /* form 1..10..01..1 */ + c += r; + } + } else { + r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ + c = r - h - 1; + } + if (ext == TCG_TYPE_I32) { + r &= 31; + c &= 31; + } + + tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); +} + static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_target_long value) { AArch64Insn insn; - - if (type == TCG_TYPE_I32) { + int i, wantinv, shift; + tcg_target_long svalue = value; + tcg_target_long ivalue = ~value; + tcg_target_long imask; + + /* For 32-bit values, discard potential garbage in value. For 64-bit + values within [2**31, 2**32-1], we can create smaller sequences by + interpreting this as a negative 32-bit number, while ensuring that + the high 32 bits are cleared by setting SF=0. */ + if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { + svalue = (int32_t)value; value = (uint32_t)value; + ivalue = (uint32_t)ivalue; + type = TCG_TYPE_I32; + } + + /* Speed things up by handling the common case of small positive + and negative values specially. */ + if ((value & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); + return; + } else if ((ivalue & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); + return; + } + + /* Check for bitfield immediates. For the benefit of 32-bit quantities, + use the sign-extended value. That lets us match rotated values such + as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ + if (is_limm(svalue)) { + tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); + return; + } + + /* Look for host pointer values within 4G of the PC. This happens + often when loading pointers to QEMU's own data structures. */ + if (type == TCG_TYPE_I64) { + tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); + if (disp == sextract64(disp, 0, 21)) { + tcg_out_insn(s, 3406, ADRP, rd, disp); + if (value & 0xfff) { + tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); + } + return; + } } - /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the - first MOVZ with the half-word immediate skipping the zeros, with a shift - (LSL) equal to this number. Then all next instructions use MOVKs. - Zero the processed half-word in the value, continue until empty. - We build the final result 16bits at a time with up to 4 instructions, - but do not emit instructions for 16bit zero holes. */ + /* Would it take fewer insns to begin with MOVN? For the value and its + inverse, count the number of 16-bit lanes that are 0. */ + for (i = wantinv = imask = 0; i < 64; i += 16) { + tcg_target_long mask = 0xffffull << i; + if ((value & mask) == 0) { + wantinv -= 1; + } + if ((ivalue & mask) == 0) { + wantinv += 1; + imask |= mask; + } + } + + /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ insn = I3405_MOVZ; - do { - unsigned shift = ctz64(value) & (63 & -16); - tcg_out_insn_3405(s, insn, shift >= 32, rd, value >> shift, shift); + if (wantinv > 0) { + value = ivalue; + insn = I3405_MOVN; + } + + /* Find the lowest lane that is not 0x0000. */ + shift = ctz64(value) & (63 & -16); + tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); + + if (wantinv > 0) { + /* Re-invert the value, so MOVK sees non-inverted bits. */ + value = ~value; + /* Clear out all the 0xffff lanes. */ + value ^= imask; + } + /* Clear out the lane that we just set. */ + value &= ~(0xffffUL << shift); + + /* Iterate until all lanes have been set, and thus cleared from VALUE. */ + while (value) { + shift = ctz64(value) & (63 & -16); + tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); value &= ~(0xffffUL << shift); - insn = I3405_MOVK; - } while (value); + } } -static inline void tcg_out_ldst_r(TCGContext *s, - enum aarch64_ldst_op_data op_data, - enum aarch64_ldst_op_type op_type, - TCGReg rd, TCGReg base, TCGReg regoff) -{ - /* load from memory to register using base + 64bit register offset */ - /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */ - /* the 0x6000 is for the "no extend field" */ - tcg_out32(s, 0x00206800 - | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd); -} +/* Define something more legible for general use. */ +#define tcg_out_ldst_r tcg_out_insn_3310 -/* solve the whole ldst problem */ -static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data, - enum aarch64_ldst_op_type type, - TCGReg rd, TCGReg rn, tcg_target_long offset) +static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) { - if (offset >= -256 && offset < 256) { - tcg_out_ldst_9(s, data, type, rd, rn, offset); - return; - } + TCGMemOp size = (uint32_t)insn >> 30; - if (offset >= 256) { - /* if the offset is naturally aligned and in range, - then we can use the scaled uimm12 encoding */ - unsigned int s_bits = data >> 6; - if (!(offset & ((1 << s_bits) - 1))) { - tcg_target_ulong scaled_uimm = offset >> s_bits; - if (scaled_uimm <= 0xfff) { - tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm); - return; - } + /* If the offset is naturally aligned and in range, then we can + use the scaled uimm12 encoding */ + if (offset >= 0 && !(offset & ((1 << size) - 1))) { + uintptr_t scaled_uimm = offset >> size; + if (scaled_uimm <= 0xfff) { + tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); + return; } } - /* worst-case scenario, move offset to temp register, use reg offset */ + /* Small signed offsets can use the unscaled encoding. */ + if (offset >= -256 && offset < 256) { + tcg_out_insn_3312(s, insn, rd, rn, offset); + return; + } + + /* Worst-case scenario, move offset to temp register, use reg offset. */ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); - tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP); + tcg_out_ldst_r(s, insn, rd, rn, TCG_REG_TMP); } static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { if (ret != arg) { - tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg); + tcg_out_movr(s, type, ret, arg); } } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2) { - tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD, + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, arg, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2) { - tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST, + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, arg, arg1, arg2); } @@ -699,91 +788,52 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, } } -static inline void tcg_out_goto(TCGContext *s, intptr_t target) +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) { - intptr_t offset = (target - (intptr_t)s->code_ptr) / 4; - - if (offset < -0x02000000 || offset >= 0x02000000) { - /* out of 26bit range */ - tcg_abort(); - } - - tcg_out32(s, 0x14000000 | (offset & 0x03ffffff)); + ptrdiff_t offset = target - s->code_ptr; + assert(offset == sextract64(offset, 0, 26)); + tcg_out_insn(s, 3206, B, offset); } static inline void tcg_out_goto_noaddr(TCGContext *s) { - /* We pay attention here to not modify the branch target by - reading from the buffer. This ensure that caches and memory are - kept coherent during retranslation. - Mask away possible garbage in the high bits for the first translation, - while keeping the offset bits for retranslation. */ - uint32_t insn; - insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000; - tcg_out32(s, insn); + /* We pay attention here to not modify the branch target by reading from + the buffer. This ensure that caches and memory are kept coherent during + retranslation. Mask away possible garbage in the high bits for the + first translation, while keeping the offset bits for retranslation. */ + uint32_t old = tcg_in32(s); + tcg_out_insn(s, 3206, B, old); } static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) { - /* see comments in tcg_out_goto_noaddr */ - uint32_t insn; - insn = tcg_in32(s) & (0x07ffff << 5); - insn |= 0x54000000 | tcg_cond_to_aarch64[c]; - tcg_out32(s, insn); -} - -static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target) -{ - intptr_t offset = (target - (intptr_t)s->code_ptr) / 4; - - if (offset < -0x40000 || offset >= 0x40000) { - /* out of 19bit range */ - tcg_abort(); - } - - offset &= 0x7ffff; - tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5); + /* See comments in tcg_out_goto_noaddr. */ + uint32_t old = tcg_in32(s) >> 5; + tcg_out_insn(s, 3202, B_C, c, old); } static inline void tcg_out_callr(TCGContext *s, TCGReg reg) { - tcg_out32(s, 0xd63f0000 | reg << 5); + tcg_out_insn(s, 3207, BLR, reg); } -static inline void tcg_out_gotor(TCGContext *s, TCGReg reg) +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) { - tcg_out32(s, 0xd61f0000 | reg << 5); -} - -static inline void tcg_out_call(TCGContext *s, intptr_t target) -{ - intptr_t offset = (target - (intptr_t)s->code_ptr) / 4; - - if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */ - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target); - tcg_out_callr(s, TCG_REG_TMP); + ptrdiff_t offset = target - s->code_ptr; + if (offset == sextract64(offset, 0, 26)) { + tcg_out_insn(s, 3206, BL, offset); } else { - tcg_out32(s, 0x94000000 | (offset & 0x03ffffff)); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); + tcg_out_callr(s, TCG_REG_TMP); } } -static inline void tcg_out_ret(TCGContext *s) -{ - /* emit RET { LR } */ - tcg_out32(s, 0xd65f03c0); -} - void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - intptr_t target = addr; - intptr_t offset = (target - (intptr_t)jmp_addr) / 4; - - if (offset < -0x02000000 || offset >= 0x02000000) { - /* out of 26bit range */ - tcg_abort(); - } + tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; + tcg_insn_unit *target = (tcg_insn_unit *)addr; - patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0); + reloc_pc26(code_ptr, target); flush_icache_range(jmp_addr, jmp_addr + 4); } @@ -795,52 +845,69 @@ static inline void tcg_out_goto_label(TCGContext *s, int label_index) tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0); tcg_out_goto_noaddr(s); } else { - tcg_out_goto(s, l->u.value); + tcg_out_goto(s, l->u.value_ptr); } } -static inline void tcg_out_goto_label_cond(TCGContext *s, - TCGCond c, int label_index) +static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, + TCGArg b, bool b_const, int label) { - TCGLabel *l = &s->labels[label_index]; + TCGLabel *l = &s->labels[label]; + intptr_t offset; + bool need_cmp; + + if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { + need_cmp = false; + } else { + need_cmp = true; + tcg_out_cmp(s, ext, a, b, b_const); + } if (!l->has_value) { - tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0); - tcg_out_goto_cond_noaddr(s, c); + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0); + offset = tcg_in32(s) >> 5; } else { - tcg_out_goto_cond(s, c, l->u.value); + offset = l->u.value_ptr - s->code_ptr; + assert(offset == sextract64(offset, 0, 19)); + } + + if (need_cmp) { + tcg_out_insn(s, 3202, B_C, c, offset); + } else if (c == TCG_COND_EQ) { + tcg_out_insn(s, 3201, CBZ, ext, a, offset); + } else { + tcg_out_insn(s, 3201, CBNZ, ext, a, offset); } } -static inline void tcg_out_rev(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rm) +static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) { - /* using REV 0x5ac00800 */ - unsigned int base = ext ? 0xdac00c00 : 0x5ac00800; - tcg_out32(s, base | rm << 5 | rd); + tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); } -static inline void tcg_out_rev16(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rm) +static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) { - /* using REV16 0x5ac00400 */ - unsigned int base = ext ? 0xdac00400 : 0x5ac00400; - tcg_out32(s, base | rm << 5 | rd); + tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); } -static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits, +static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, TCGReg rd, TCGReg rn) { /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ - int bits = 8 * (1 << s_bits) - 1; + int bits = (8 << s_bits) - 1; tcg_out_sbfm(s, ext, rd, rn, 0, bits); } -static inline void tcg_out_uxt(TCGContext *s, int s_bits, +static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, TCGReg rd, TCGReg rn) { /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ - int bits = 8 * (1 << s_bits) - 1; + int bits = (8 << s_bits) - 1; tcg_out_ubfm(s, 0, rd, rn, 0, bits); } @@ -854,37 +921,6 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, } } -/* This function is used for the Logical (immediate) instruction group. - The value of LIMM must satisfy IS_LIMM. See the comment above about - only supporting simplified logical immediates. */ -static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, uint64_t limm) -{ - unsigned h, l, r, c; - - assert(is_limm(limm)); - - h = clz64(limm); - l = ctz64(limm); - if (l == 0) { - r = 0; /* form 0....01....1 */ - c = ctz64(~limm) - 1; - if (h == 0) { - r = clz64(~limm); /* form 1..10..01..1 */ - c += r; - } - } else { - r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ - c = r - h - 1; - } - if (ext == TCG_TYPE_I32) { - r &= 31; - c &= 31; - } - - tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); -} - static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, tcg_target_long bl, tcg_target_long bh, @@ -921,71 +957,84 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, } tcg_out_insn_3503(s, insn, ext, rh, ah, bh); - if (rl != orig_rl) { - tcg_out_movr(s, ext, orig_rl, rl); - } + tcg_out_mov(s, ext, orig_rl, rl); } #ifdef CONFIG_SOFTMMU /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static const void * const qemu_ld_helpers[4] = { - helper_ret_ldub_mmu, - helper_ret_lduw_mmu, - helper_ret_ldul_mmu, - helper_ret_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ -static const void * const qemu_st_helpers[4] = { - helper_ret_stb_mmu, - helper_ret_stw_mmu, - helper_ret_stl_mmu, - helper_ret_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; +static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} + static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr); + TCGMemOp opc = lb->opc; + TCGMemOp size = opc & MO_SIZE; - tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); - tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); + reloc_pc19(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr); - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, - (tcg_target_long)qemu_ld_helpers[lb->opc & 3]); - tcg_out_callr(s, TCG_REG_TMP); - if (lb->opc & 0x04) { - tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0); + tcg_out_adr(s, TCG_REG_X3, lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + if (opc & MO_SIGN) { + tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0); } else { - tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0); + tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); } - tcg_out_goto(s, (intptr_t)lb->raddr); + tcg_out_goto(s, lb->raddr); } static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr); + TCGMemOp opc = lb->opc; + TCGMemOp size = opc & MO_SIZE; + + reloc_pc19(lb->label_ptr[0], s->code_ptr); - tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); - tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); - tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg); + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); + tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr); - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, - (intptr_t)qemu_st_helpers[lb->opc & 3]); - tcg_out_callr(s, TCG_REG_TMP); - tcg_out_goto(s, (tcg_target_long)lb->raddr); + tcg_out_adr(s, TCG_REG_X4, lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_goto(s, lb->raddr); } -static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, TCGReg data_reg, TCGReg addr_reg, - int mem_index, - uint8_t *raddr, uint8_t *label_ptr) + int mem_index, tcg_insn_unit *raddr, + tcg_insn_unit *label_ptr) { TCGLabelQemuLdst *label = new_ldst_label(s); @@ -1002,96 +1051,109 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, slow path for the failure case, which will be patched later when finalizing the slow path. Generated code returns the host addend in X1, clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, - int s_bits, uint8_t **label_ptr, int mem_index, int is_read) +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, + tcg_insn_unit **label_ptr, int mem_index, + bool is_read) { TCGReg base = TCG_AREG0; int tlb_offset = is_read ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + /* Extract the TLB index from the address into X0. X0<CPU_TLB_BITS:0> = addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */ - tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg, + tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); + /* Store the page mask part of the address and the low s_bits into X3. Later this allows checking for equality and alignment at the same time. X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + /* Add any "high bits" from the tlb offset to the env address into X2, to take advantage of the LSL12 form of the ADDI instruction. X2 = env + (tlb_offset & 0xfff000) */ - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, - tlb_offset & 0xfff000); + if (tlb_offset & 0xfff000) { + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, + tlb_offset & 0xfff000); + base = TCG_REG_X2; + } + /* Merge the tlb index contribution into X2. X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ - tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2, + tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, TCG_REG_X0, CPU_TLB_ENTRY_BITS); + /* Merge "low bits" from tlb offset, load the tlb comparator into X0. X0 = load [X2 + (tlb_offset & 0x000fff)] */ - tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32, - LDST_LD, TCG_REG_X0, TCG_REG_X2, - (tlb_offset & 0xfff)); + tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, + TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); + /* Load the tlb addend. Do that early to avoid stalling. X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ - tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2, + tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - (is_read ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write))); + /* Perform the address comparison. */ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); - *label_ptr = s->code_ptr; + /* If not equal, we jump to the slow path. */ + *label_ptr = s->code_ptr; tcg_out_goto_cond_noaddr(s, TCG_COND_NE); } #endif /* CONFIG_SOFTMMU */ -static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, - TCGReg addr_r, TCGReg off_r) +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, + TCGReg data_r, TCGReg addr_r, TCGReg off_r) { - switch (opc) { - case 0: - tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r); + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SSIZE) { + case MO_UB: + tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, off_r); break; - case 0 | 4: - tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r); + case MO_SB: + tcg_out_ldst_r(s, I3312_LDRSBX, data_r, addr_r, off_r); break; - case 1: - tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r); - if (TCG_LDST_BSWAP) { - tcg_out_rev16(s, 0, data_r, data_r); + case MO_UW: + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r); + if (bswap) { + tcg_out_rev16(s, data_r, data_r); } break; - case 1 | 4: - if (TCG_LDST_BSWAP) { - tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r); - tcg_out_rev16(s, 0, data_r, data_r); - tcg_out_sxt(s, 1, 1, data_r, data_r); + case MO_SW: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r); + tcg_out_rev16(s, data_r, data_r); + tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r); } else { - tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r); + tcg_out_ldst_r(s, I3312_LDRSHX, data_r, addr_r, off_r); } break; - case 2: - tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); - if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 0, data_r, data_r); + case MO_UL: + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r); + if (bswap) { + tcg_out_rev32(s, data_r, data_r); } break; - case 2 | 4: - if (TCG_LDST_BSWAP) { - tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); - tcg_out_rev(s, 0, data_r, data_r); - tcg_out_sxt(s, 1, 2, data_r, data_r); + case MO_SL: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r); + tcg_out_rev32(s, data_r, data_r); + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); } else { - tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r); + tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, off_r); } break; - case 3: - tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r); - if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 1, data_r, data_r); + case MO_Q: + tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, off_r); + if (bswap) { + tcg_out_rev64(s, data_r, data_r); } break; default: @@ -1099,140 +1161,76 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, } } -static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r, - TCGReg addr_r, TCGReg off_r) +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, + TCGReg data_r, TCGReg addr_r, TCGReg off_r) { - switch (opc) { - case 0: - tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r); + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SIZE) { + case MO_8: + tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, off_r); break; - case 1: - if (TCG_LDST_BSWAP) { - tcg_out_rev16(s, 0, TCG_REG_TMP, data_r); - tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r); - } else { - tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r); + case MO_16: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev16(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; } + tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, off_r); break; - case 2: - if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 0, TCG_REG_TMP, data_r); - tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r); - } else { - tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r); + case MO_32: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev32(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; } + tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, off_r); break; - case 3: - if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 1, TCG_REG_TMP, data_r); - tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r); - } else { - tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r); + case MO_64: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev64(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; } + tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, off_r); break; default: tcg_abort(); } } -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp memop, int mem_index) { - TCGReg addr_reg, data_reg; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits; - uint8_t *label_ptr; -#endif - data_reg = args[0]; - addr_reg = args[1]; + TCGMemOp s_bits = memop & MO_SIZE; + tcg_insn_unit *label_ptr; -#ifdef CONFIG_SOFTMMU - mem_index = args[2]; - s_bits = opc & 3; tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1); - add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, + tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1); + add_qemu_ldst_label(s, true, memop, data_reg, addr_reg, mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, + tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR); #endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp memop, int mem_index) { - TCGReg addr_reg, data_reg; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits; - uint8_t *label_ptr; -#endif - data_reg = args[0]; - addr_reg = args[1]; - -#ifdef CONFIG_SOFTMMU - mem_index = args[2]; - s_bits = opc & 3; + TCGMemOp s_bits = memop & MO_SIZE; + tcg_insn_unit *label_ptr; tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1); - add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, + tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1); + add_qemu_ldst_label(s, false, memop, data_reg, addr_reg, mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, + tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR); #endif /* CONFIG_SOFTMMU */ } -static uint8_t *tb_ret_addr; - -/* callee stack use example: - stp x29, x30, [sp,#-32]! - mov x29, sp - stp x1, x2, [sp,#16] - ... - ldp x1, x2, [sp,#16] - ldp x29, x30, [sp],#32 - ret -*/ - -/* push r1 and r2, and alloc stack space for a total of - alloc_n elements (1 element=16 bytes, must be between 1 and 31. */ -static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr, - TCGReg r1, TCGReg r2, int alloc_n) -{ - /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx) - | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */ - assert(alloc_n > 0 && alloc_n < 0x20); - alloc_n = (-alloc_n) & 0x3f; - tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1); -} - -/* dealloc stack space for a total of alloc_n elements and pop r1, r2. */ -static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr, - TCGReg r1, TCGReg r2, int alloc_n) -{ - /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx) - | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */ - assert(alloc_n > 0 && alloc_n < 0x20); - tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1); -} - -static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr, - TCGReg r1, TCGReg r2, int idx) -{ - /* using register pair offset simm7 STP 0x29000000 | (ext) - | idx << 16 | r2 << 10 | addr << 5 | r1 */ - assert(idx > 0 && idx < 0x20); - tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1); -} - -static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr, - TCGReg r1, TCGReg r2, int idx) -{ - /* using register pair offset simm7 LDP 0x29400000 | (ext) - | idx << 16 | r2 << 10 | addr << 5 | r1 */ - assert(idx > 0 && idx < 0x20); - tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1); -} +static tcg_insn_unit *tb_ret_addr; static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1255,7 +1253,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, (intptr_t)tb_ret_addr); + tcg_out_goto(s, tb_ret_addr); break; case INDEX_op_goto_tb: @@ -1263,46 +1261,62 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #error "USE_DIRECT_JUMP required for aarch64" #endif assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ - s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[a0] = tcg_current_code_size(s); /* actual branch destination will be patched by aarch64_tb_set_jmp_target later, beware retranslation. */ tcg_out_goto_noaddr(s); - s->tb_next_offset[a0] = s->code_ptr - s->code_buf; - break; - - case INDEX_op_call: - if (const_args[0]) { - tcg_out_call(s, a0); - } else { - tcg_out_callr(s, a0); - } + s->tb_next_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_goto_label(s, a0); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: - case INDEX_op_st_i32: - case INDEX_op_st_i64: case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: case INDEX_op_ld8u_i64: + tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); + break; + case INDEX_op_ld8s_i32: + tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); + break; case INDEX_op_ld8s_i64: + tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: + tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); + break; case INDEX_op_ld16s_i64: + tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); + break; + case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: + tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); + break; case INDEX_op_ld32s_i64: + tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); + break; + case INDEX_op_st8_i32: case INDEX_op_st8_i64: + tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); + break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: + tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); + break; + case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc), - a0, a1, a2); + tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); break; case INDEX_op_add_i32: @@ -1478,8 +1492,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, a1 = (int32_t)a1; /* FALLTHRU */ case INDEX_op_brcond_i64: - tcg_out_cmp(s, ext, a0, a1, const_args[1]); - tcg_out_goto_label_cond(s, a2, args[3]); + tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]); break; case INDEX_op_setcond_i32: @@ -1500,77 +1513,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0 | 0); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 4 | 0); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 0 | 1); - break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 4 | 1); - break; - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, 0 | 2); - break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, 4 | 2); - break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 0 | 2); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 0 | 3); - break; - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, args[3]); break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, REG0(0), a1, a2, args[3]); break; - case INDEX_op_bswap32_i64: - /* Despite the _i64, this is a 32-bit bswap. */ - ext = 0; - /* FALLTHRU */ case INDEX_op_bswap64_i64: + tcg_out_rev64(s, a0, a1); + break; + case INDEX_op_bswap32_i64: case INDEX_op_bswap32_i32: - tcg_out_rev(s, ext, a0, a1); + tcg_out_rev32(s, a0, a1); break; case INDEX_op_bswap16_i64: case INDEX_op_bswap16_i32: - tcg_out_rev16(s, 0, a0, a1); + tcg_out_rev16(s, a0, a1); break; case INDEX_op_ext8s_i64: case INDEX_op_ext8s_i32: - tcg_out_sxt(s, ext, 0, a0, a1); + tcg_out_sxt(s, ext, MO_8, a0, a1); break; case INDEX_op_ext16s_i64: case INDEX_op_ext16s_i32: - tcg_out_sxt(s, ext, 1, a0, a1); + tcg_out_sxt(s, ext, MO_16, a0, a1); break; case INDEX_op_ext32s_i64: - tcg_out_sxt(s, 1, 2, a0, a1); + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); break; case INDEX_op_ext8u_i64: case INDEX_op_ext8u_i32: - tcg_out_uxt(s, 0, a0, a1); + tcg_out_uxt(s, MO_8, a0, a1); break; case INDEX_op_ext16u_i64: case INDEX_op_ext16u_i32: - tcg_out_uxt(s, 1, a0, a1); + tcg_out_uxt(s, MO_16, a0, a1); break; case INDEX_op_ext32u_i64: - tcg_out_movr(s, 0, a0, a1); + tcg_out_movr(s, TCG_TYPE_I32, a0, a1); break; case INDEX_op_deposit_i64: @@ -1604,13 +1588,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: - case INDEX_op_mov_i32: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ case INDEX_op_movi_i64: - case INDEX_op_movi_i32: - /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: - /* Opcode not implemented. */ tcg_abort(); } @@ -1620,15 +1603,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_mov_i64, { "r", "r" } }, - - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -1642,17 +1618,17 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_ld32s_i64, { "r", "r" } }, { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "r", "rwA" } }, + { INDEX_op_add_i32, { "r", "r", "rA" } }, { INDEX_op_add_i64, { "r", "r", "rA" } }, - { INDEX_op_sub_i32, { "r", "r", "rwA" } }, + { INDEX_op_sub_i32, { "r", "r", "rA" } }, { INDEX_op_sub_i64, { "r", "r", "rA" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mul_i64, { "r", "r", "r" } }, @@ -1664,17 +1640,17 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_rem_i64, { "r", "r", "r" } }, { INDEX_op_remu_i32, { "r", "r", "r" } }, { INDEX_op_remu_i64, { "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rwL" } }, + { INDEX_op_and_i32, { "r", "r", "rL" } }, { INDEX_op_and_i64, { "r", "r", "rL" } }, - { INDEX_op_or_i32, { "r", "r", "rwL" } }, + { INDEX_op_or_i32, { "r", "r", "rL" } }, { INDEX_op_or_i64, { "r", "r", "rL" } }, - { INDEX_op_xor_i32, { "r", "r", "rwL" } }, + { INDEX_op_xor_i32, { "r", "r", "rL" } }, { INDEX_op_xor_i64, { "r", "r", "rL" } }, - { INDEX_op_andc_i32, { "r", "r", "rwL" } }, + { INDEX_op_andc_i32, { "r", "r", "rL" } }, { INDEX_op_andc_i64, { "r", "r", "rL" } }, - { INDEX_op_orc_i32, { "r", "r", "rwL" } }, + { INDEX_op_orc_i32, { "r", "r", "rL" } }, { INDEX_op_orc_i64, { "r", "r", "rL" } }, - { INDEX_op_eqv_i32, { "r", "r", "rwL" } }, + { INDEX_op_eqv_i32, { "r", "r", "rL" } }, { INDEX_op_eqv_i64, { "r", "r", "rL" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1693,27 +1669,17 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_rotl_i64, { "r", "r", "ri" } }, { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - { INDEX_op_brcond_i32, { "r", "rwA" } }, + { INDEX_op_brcond_i32, { "r", "rA" } }, { INDEX_op_brcond_i64, { "r", "rA" } }, - { INDEX_op_setcond_i32, { "r", "r", "rwA" } }, + { INDEX_op_setcond_i32, { "r", "r", "rA" } }, { INDEX_op_setcond_i64, { "r", "r", "rA" } }, - { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, - { INDEX_op_qemu_ld8u, { "r", "l" } }, - { INDEX_op_qemu_ld8s, { "r", "l" } }, - { INDEX_op_qemu_ld16u, { "r", "l" } }, - { INDEX_op_qemu_ld16s, { "r", "l" } }, - { INDEX_op_qemu_ld32u, { "r", "l" } }, - { INDEX_op_qemu_ld32s, { "r", "l" } }, - - { INDEX_op_qemu_ld32, { "r", "l" } }, - { INDEX_op_qemu_ld64, { "r", "l" } }, - - { INDEX_op_qemu_st8, { "l", "l" } }, - { INDEX_op_qemu_st16, { "l", "l" } }, - { INDEX_op_qemu_st32, { "l", "l" } }, - { INDEX_op_qemu_st64, { "l", "l" } }, + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "l" } }, + { INDEX_op_qemu_st_i32, { "lZ", "l" } }, + { INDEX_op_qemu_st_i64, { "lZ", "l" } }, { INDEX_op_bswap16_i32, { "r", "r" } }, { INDEX_op_bswap32_i32, { "r", "r" } }, @@ -1736,9 +1702,9 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, { INDEX_op_muluh_i64, { "r", "r", "r" } }, @@ -1762,7 +1728,7 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | - (1 << TCG_REG_X18)); + (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); @@ -1773,40 +1739,44 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(aarch64_op_defs); } +/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ +#define PUSH_SIZE ((30 - 19 + 1) * 8) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +/* We're expecting to use a single ADDI insn. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); + static void tcg_target_qemu_prologue(TCGContext *s) { - /* NB: frame sizes are in 16 byte stack units! */ - int frame_size_callee_saved, frame_size_tcg_locals; TCGReg r; - /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */ - frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1; - - /* frame size requirement for TCG local variables */ - frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE - + CPU_TEMP_BUF_NLONGS * sizeof(long) - + (TCG_TARGET_STACK_ALIGN - 1); - frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1); - frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN; + /* Push (FP, LR) and allocate space for all saved registers. */ + tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, -PUSH_SIZE, 1, 1); - /* push (FP, LR) and update sp */ - tcg_out_push_pair(s, TCG_REG_SP, - TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved); + /* Set up frame pointer for canonical unwinding. */ + tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); - /* FP -> callee_saved */ - tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP); - - /* store callee-preserved regs x19..x28 using FP -> callee_saved */ + /* Store callee-preserved regs x19..x28. */ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int idx = (r - TCG_REG_X19) / 2 + 1; - tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx); + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); } /* Make stack space for TCG locals. */ tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); + FRAME_SIZE - PUSH_SIZE); - /* inform TCG about how to find TCG locals with register, offset, size */ + /* Inform TCG about how to find TCG locals with register, offset, size. */ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); @@ -1818,23 +1788,67 @@ static void tcg_target_qemu_prologue(TCGContext *s) #endif tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_gotor(s, tcg_target_call_iarg_regs[1]); + tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); + FRAME_SIZE - PUSH_SIZE); - /* restore registers x19..x28. - FP must be preserved, so it still points to callee_saved area */ + /* Restore registers x19..x28. */ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int idx = (r - TCG_REG_X19) / 2 + 1; - tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx); + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); } - /* pop (FP, LR), restore SP to previous frame, return */ - tcg_out_pop_pair(s, TCG_REG_SP, - TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved); - tcg_out_ret(s); + /* Pop (FP, LR), restore SP to previous frame. */ + tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, PUSH_SIZE, 0, 1); + tcg_out_insn(s, 3207, RET, TCG_REG_LR); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[24]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_AARCH64 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = TCG_REG_LR, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ + 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ + 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ + 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ + 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ + 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ + 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ + 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ + 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ + 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ + 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ + 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 988983ed5..60c7493ac 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -13,21 +13,27 @@ #ifndef TCG_TARGET_AARCH64 #define TCG_TARGET_AARCH64 1 -#undef TCG_TARGET_WORDS_BIGENDIAN +#define TCG_TARGET_INSN_UNIT_SIZE 4 #undef TCG_TARGET_STACK_GROWSUP typedef enum { - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4, - TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, TCG_REG_X8, TCG_REG_X9, - TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, - TCG_REG_X15, TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19, - TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, - TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, TCG_REG_X28, - TCG_REG_FP, /* frame pointer */ - TCG_REG_LR, /* link register */ - TCG_REG_SP, /* stack pointer or zero register */ - TCG_REG_XZR = TCG_REG_SP /* same register number */ - /* program counter is not directly accessible! */ + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, + TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, + TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, + TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19, + TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, + TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, + TCG_REG_X28, TCG_REG_X29, TCG_REG_X30, + + /* X31 is either the stack pointer or zero, depending on context. */ + TCG_REG_SP = 31, + TCG_REG_XZR = 31, + + /* Aliases. */ + TCG_REG_FP = TCG_REG_X29, + TCG_REG_LR = TCG_REG_X30, + TCG_AREG0 = TCG_REG_X19, } TCGReg; #define TCG_TARGET_NB_REGS 32 @@ -63,6 +69,7 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 @@ -92,12 +99,6 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -enum { - TCG_AREG0 = TCG_REG_X19, -}; - -#define TCG_TARGET_HAS_new_ldst 0 - static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { __builtin___clear_cache((char *)start, (char *)stop); diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index a65fc658e..e40301c78 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -115,36 +115,18 @@ static const int tcg_target_call_oarg_regs[2] = { #define TCG_REG_TMP TCG_REG_R12 -static inline void reloc_abs32(void *code_ptr, intptr_t target) +static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { - *(uint32_t *) code_ptr = target; + ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; + *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); } -static inline void reloc_pc24(void *code_ptr, intptr_t target) -{ - uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2); - - *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) - | (offset & 0xffffff); -} - -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - switch (type) { - case R_ARM_ABS32: - reloc_abs32(code_ptr, value); - break; - - case R_ARM_CALL: - case R_ARM_JUMP24: - default: - tcg_abort(); - - case R_ARM_PC24: - reloc_pc24(code_ptr, value); - break; - } + assert(type == R_ARM_PC24); + assert(addend == 0); + reloc_pc24(code_ptr, (tcg_insn_unit *)value); } #define TCG_CT_CONST_ARM 0x100 @@ -261,7 +243,7 @@ static inline int check_fit_imm(uint32_t imm) * mov operand2: values represented with x << (2 * y), x < 0x100 * add, sub, eor...: ditto */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct; @@ -379,20 +361,18 @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) static inline void tcg_out_b_noaddr(TCGContext *s, int cond) { - /* We pay attention here to not modify the branch target by skipping - the corresponding bytes. This ensure that caches and memory are + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are kept coherent during retranslation. */ - s->code_ptr += 3; - tcg_out8(s, (cond << 4) | 0x0a); + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); } static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) { - /* We pay attention here to not modify the branch target by skipping - the corresponding bytes. This ensure that caches and memory are + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are kept coherent during retranslation. */ - s->code_ptr += 3; - tcg_out8(s, (cond << 4) | 0x0b); + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); } static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) @@ -1010,20 +990,21 @@ static inline void tcg_out_st8(TCGContext *s, int cond, * with the code buffer limited to 16MB we wouldn't need the long case. * But we also use it for the tail-call to the qemu_ld/st helpers, which does. */ -static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) +static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) { - int32_t disp = addr - (tcg_target_long) s->code_ptr; + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); - if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { + if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { tcg_out_b(s, cond, disp); return; } - tcg_out_movi32(s, cond, TCG_REG_TMP, addr); + tcg_out_movi32(s, cond, TCG_REG_TMP, addri); if (use_armv5t_instructions) { tcg_out_bx(s, cond, TCG_REG_TMP); } else { - if (addr & 1) { + if (addri & 1) { tcg_abort(); } tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); @@ -1032,39 +1013,28 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) /* The call case is mostly used for helpers - so it's not unreasonable * for them to be beyond branch range */ -static inline void tcg_out_call(TCGContext *s, uint32_t addr) +static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) { - int32_t val; + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); - val = addr - (tcg_target_long) s->code_ptr; - if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) { - if (addr & 1) { + if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { + if (addri & 1) { /* Use BLX if the target is in Thumb mode */ if (!use_armv5t_instructions) { tcg_abort(); } - tcg_out_blx_imm(s, val); + tcg_out_blx_imm(s, disp); } else { - tcg_out_bl(s, COND_AL, val); + tcg_out_bl(s, COND_AL, disp); } } else if (use_armv7_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr); + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); - } -} - -static inline void tcg_out_callr(TCGContext *s, int cond, int arg) -{ - if (use_armv5t_instructions) { - tcg_out_blx(s, cond, arg); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, - TCG_REG_PC, SHIFT_IMM_LSL(0)); - tcg_out_bx(s, cond, arg); + tcg_out32(s, addri); } } @@ -1073,9 +1043,9 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) TCGLabel *l = &s->labels[label_index]; if (l->has_value) { - tcg_out_goto(s, cond, l->u.value); + tcg_out_goto(s, cond, l->u.value_ptr); } else { - tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337); + tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 0); tcg_out_b_noaddr(s, cond); } } @@ -1084,7 +1054,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static const void * const qemu_ld_helpers[16] = { +static void * const qemu_ld_helpers[16] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, @@ -1104,7 +1074,7 @@ static const void * const qemu_ld_helpers[16] = { /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ -static const void * const qemu_st_helpers[16] = { +static void * const qemu_st_helpers[16] = { [MO_UB] = helper_ret_stb_mmu, [MO_LEUW] = helper_le_stw_mmu, [MO_LEUL] = helper_le_stl_mmu, @@ -1253,10 +1223,10 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* Record the context of a call to the out of line helper code for the slow path for a load or store, so that we can later generate the correct helper code. */ -static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, int mem_index, - uint8_t *raddr, uint8_t *label_ptr) + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) { TCGLabelQemuLdst *label = new_ldst_label(s); @@ -1275,9 +1245,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg, datalo, datahi; TCGMemOp opc = lb->opc; - uintptr_t func; + void *func; - reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + reloc_pc24(lb->label_ptr[0], s->code_ptr); argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1292,9 +1262,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ if (use_armv6_instructions) { - func = (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]; + func = qemu_ld_helpers[opc & ~MO_SIGN]; } else { - func = (uintptr_t)qemu_ld_helpers[opc]; + func = qemu_ld_helpers[opc]; if (opc & MO_SIGN) { opc = MO_UL; } @@ -1328,7 +1298,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) break; } - tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); + tcg_out_goto(s, COND_AL, lb->raddr); } static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) @@ -1336,7 +1306,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGReg argreg, datalo, datahi; TCGMemOp opc = lb->opc; - reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + reloc_pc24(lb->label_ptr[0], s->code_ptr); argreg = TCG_REG_R0; argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); @@ -1368,7 +1338,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, (uintptr_t)qemu_st_helpers[opc]); + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]); } #endif /* SOFTMMU */ @@ -1499,7 +1469,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #ifdef CONFIG_SOFTMMU int mem_index; TCGReg addend; - uint8_t *label_ptr; + tcg_insn_unit *label_ptr; #endif datalo = *args++; @@ -1519,7 +1489,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi, mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { @@ -1628,7 +1598,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #ifdef CONFIG_SOFTMMU int mem_index; TCGReg addend; - uint8_t *label_ptr; + tcg_insn_unit *label_ptr; #endif datalo = *args++; @@ -1647,7 +1617,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) label_ptr = s->code_ptr; tcg_out_bl_noaddr(s, COND_NE); - add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi, mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { @@ -1660,7 +1630,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) @@ -1670,51 +1640,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - if (use_armv7_instructions || check_fit_imm(args[0])) { - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); - tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); - } else { - uint8_t *ld_ptr = s->code_ptr; - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); - tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); - *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; - tcg_out32(s, args[0]); - } + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* Direct jump method */ -#if defined(USE_DIRECT_JUMP) - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out_b_noaddr(s, COND_AL); -#else - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - tcg_out32(s, 0); -#endif } else { /* Indirect jump method */ -#if 1 - c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8); - if (c > 0xfff || c < -0xfff) { - tcg_out_movi32(s, COND_AL, TCG_REG_R0, - (tcg_target_long) (s->tb_next + args[0])); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0); - } else - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c); -#else - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0); - tcg_out32(s, (tcg_target_long) (s->tb_next + args[0])); -#endif + intptr_t ptr = (intptr_t)(s->tb_next + args[0]); + tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - case INDEX_op_call: - if (const_args[0]) - tcg_out_call(s, args[0]); - else - tcg_out_callr(s, COND_AL, args[0]); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_goto_label(s, COND_AL, args[0]); @@ -1745,13 +1685,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); break; - case INDEX_op_mov_i32: - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - args[0], 0, args[1], SHIFT_IMM_LSL(0)); - break; - case INDEX_op_movi_i32: - tcg_out_movi32(s, COND_AL, args[0], args[1]); - break; case INDEX_op_movcond_i32: /* Constraints mean that v2 is always in the same register as dest, * so we only need to do "if condition passed, move v1 to dest". @@ -1967,6 +1900,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); } @@ -1975,12 +1911,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -2145,8 +2077,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) } typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; + DebugFrameHeader h; uint8_t fde_def_cfa[4]; uint8_t fde_reg_ofs[18]; } DebugFrame; @@ -2156,16 +2087,16 @@ typedef struct { /* We're expecting a 2 byte uleb128 encoded value. */ QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = 0x7c, /* sleb128 -4 */ - .cie.return_column = 14, +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 14, /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), .fde_def_cfa = { 12, 13, /* DW_CFA_def_cfa sp, ... */ @@ -2188,8 +2119,5 @@ static DebugFrame debug_frame = { void tcg_register_jit(void *buf, size_t buf_size) { - debug_frame.fde.func_start = (tcg_target_long) buf; - debug_frame.fde.func_len = buf_size; - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 3746b6e29..1c719e286 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -25,8 +25,8 @@ #ifndef TCG_TARGET_ARM #define TCG_TARGET_ARM 1 -#undef TCG_TARGET_WORDS_BIGENDIAN #undef TCG_TARGET_STACK_GROWSUP +#define TCG_TARGET_INSN_UNIT_SIZE 4 typedef enum { TCG_REG_R0 = 0, @@ -79,14 +79,13 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_new_ldst 1 - extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index f832282d1..4133dcf1a 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -139,9 +139,9 @@ static bool have_bmi2; # define have_bmi2 0 #endif -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { value += addend; @@ -151,14 +151,14 @@ static void patch_reloc(uint8_t *code_ptr, int type, if (value != (int32_t)value) { tcg_abort(); } - *(uint32_t *)code_ptr = value; + tcg_patch32(code_ptr, value); break; case R_386_PC8: value -= (uintptr_t)code_ptr; if (value != (int8_t)value) { tcg_abort(); } - *(uint8_t *)code_ptr = value; + tcg_patch8(code_ptr, value); break; default: tcg_abort(); @@ -257,7 +257,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; @@ -859,7 +859,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) TCGLabel *l = &s->labels[label_index]; if (l->has_value) { - val = l->u.value - (intptr_t)s->code_ptr; + val = tcg_pcrel_diff(s, l->u.value_ptr); val1 = val - 2; if ((int8_t)val1 == val1) { if (opc == -1) { @@ -1099,26 +1099,26 @@ static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, } #endif -static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest) +static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) { - intptr_t disp = dest - (intptr_t)s->code_ptr - 5; + intptr_t disp = tcg_pcrel_diff(s, dest) - 5; if (disp == (int32_t)disp) { tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); tcg_out32(s, disp); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); tcg_out_modrm(s, OPC_GRP5, call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); } } -static inline void tcg_out_calli(TCGContext *s, uintptr_t dest) +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) { tcg_out_branch(s, 1, dest); } -static void tcg_out_jmp(TCGContext *s, uintptr_t dest) +static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) { tcg_out_branch(s, 0, dest); } @@ -1127,7 +1127,7 @@ static void tcg_out_jmp(TCGContext *s, uintptr_t dest) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static const void * const qemu_ld_helpers[16] = { +static void * const qemu_ld_helpers[16] = { [MO_UB] = helper_ret_ldub_mmu, [MO_LEUW] = helper_le_lduw_mmu, [MO_LEUL] = helper_le_ldul_mmu, @@ -1140,7 +1140,7 @@ static const void * const qemu_ld_helpers[16] = { /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ -static const void * const qemu_st_helpers[16] = { +static void * const qemu_st_helpers[16] = { [MO_UB] = helper_ret_stb_mmu, [MO_LEUW] = helper_le_stw_mmu, [MO_LEUL] = helper_le_stl_mmu, @@ -1173,7 +1173,7 @@ static const void * const qemu_st_helpers[16] = { static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int mem_index, TCGMemOp s_bits, - uint8_t **label_ptr, int which) + tcg_insn_unit **label_ptr, int which) { const TCGReg r0 = TCG_REG_L0; const TCGReg r1 = TCG_REG_L1; @@ -1244,11 +1244,11 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, * Record the context of a call to the out of line helper code for the slow path * for a load or store, so that we can later generate the correct helper code */ -static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, - int mem_index, uint8_t *raddr, - uint8_t **label_ptr) + int mem_index, tcg_insn_unit *raddr, + tcg_insn_unit **label_ptr) { TCGLabelQemuLdst *label = new_ldst_label(s); @@ -1273,12 +1273,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOp opc = l->opc; TCGReg data_reg; - uint8_t **label_ptr = &l->label_ptr[0]; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; /* resolve label address */ - *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } if (TCG_TARGET_REG_BITS == 32) { @@ -1308,7 +1308,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) (uintptr_t)l->raddr); } - tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { @@ -1346,7 +1346,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, (uintptr_t)l->raddr); + tcg_out_jmp(s, l->raddr); } /* @@ -1356,13 +1356,13 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOp opc = l->opc; TCGMemOp s_bits = opc & MO_SIZE; - uint8_t **label_ptr = &l->label_ptr[0]; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; TCGReg retaddr; /* resolve label address */ - *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } if (TCG_TARGET_REG_BITS == 32) { @@ -1407,13 +1407,14 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } else { retaddr = TCG_REG_RAX; tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, + TCG_TARGET_CALL_STACK_OFFSET); } } /* "Tail call" to the helper, with the return address back inline. */ tcg_out_push(s, retaddr); - tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]); + tcg_out_jmp(s, qemu_st_helpers[opc]); } #elif defined(__x86_64__) && defined(__linux__) # include <asm/prctl.h> @@ -1534,7 +1535,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; TCGMemOp s_bits; - uint8_t *label_ptr[2]; + tcg_insn_unit *label_ptr[2]; #endif datalo = *args++; @@ -1554,7 +1555,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi, mem_index, s->code_ptr, label_ptr); #else { @@ -1665,7 +1666,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; TCGMemOp s_bits; - uint8_t *label_ptr[2]; + tcg_insn_unit *label_ptr[2]; #endif datalo = *args++; @@ -1685,7 +1686,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi, mem_index, s->code_ptr, label_ptr); #else { @@ -1731,35 +1732,24 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch(opc) { case INDEX_op_exit_tb: tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); - tcg_out_jmp(s, (uintptr_t)tb_ret_addr); + tcg_out_jmp(s, tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* indirect jump method */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, (intptr_t)(s->tb_next + args[0])); } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - case INDEX_op_call: - if (const_args[0]) { - tcg_out_calli(s, args[0]); - } else { - /* call *reg */ - tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]); - } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, args[0], 0); break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); - break; OP_32_64(ld8u): /* Note that we can ignore REXW for the zero-extend to 64-bit. */ tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); @@ -2009,9 +1999,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond2(s, args, const_args); break; #else /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; case INDEX_op_ld32s_i64: tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); break; @@ -2068,6 +2055,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); } @@ -2078,10 +2070,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -2135,8 +2124,6 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, #else - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, { INDEX_op_ld16u_i64, { "r", "r" } }, @@ -2355,8 +2342,7 @@ static void tcg_target_init(TCGContext *s) } typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; + DebugFrameHeader h; uint8_t fde_def_cfa[4]; uint8_t fde_reg_ofs[14]; } DebugFrame; @@ -2368,16 +2354,16 @@ QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); /* Host machine without ELF. */ #elif TCG_TARGET_REG_BITS == 64 #define ELF_HOST_MACHINE EM_X86_64 -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = 0x78, /* sleb128 -8 */ - .cie.return_column = 16, +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = 16, /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), .fde_def_cfa = { 12, 7, /* DW_CFA_def_cfa %rsp, ... */ @@ -2397,16 +2383,16 @@ static DebugFrame debug_frame = { }; #else #define ELF_HOST_MACHINE EM_386 -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = 0x7c, /* sleb128 -4 */ - .cie.return_column = 8, +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 8, /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), .fde_def_cfa = { 12, 4, /* DW_CFA_def_cfa %esp, ... */ @@ -2427,9 +2413,6 @@ static DebugFrame debug_frame = { #if defined(ELF_HOST_MACHINE) void tcg_register_jit(void *buf, size_t buf_size) { - debug_frame.fde.func_start = (uintptr_t)buf; - debug_frame.fde.func_len = buf_size; - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } #endif diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index bdf222245..7a9980e70 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -24,7 +24,7 @@ #ifndef TCG_TARGET_I386 #define TCG_TARGET_I386 1 -#undef TCG_TARGET_WORDS_BIGENDIAN +#define TCG_TARGET_INSN_UNIT_SIZE 1 #ifdef __x86_64__ # define TCG_TARGET_REG_BITS 64 @@ -101,6 +101,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 @@ -129,8 +130,6 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i64 0 #endif -#define TCG_TARGET_HAS_new_ldst 1 - #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ ((ofs) == 0 && (len) == 16)) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 2d8e00cd9..6bc992464 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -23,8 +23,6 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" - /* * Register definitions */ @@ -221,10 +219,12 @@ enum { OPC_ALLOC_M34 = 0x02c00000000ull, OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, + OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, OPC_CMP_LT_A6 = 0x18000000000ull, OPC_CMP_LTU_A6 = 0x1a000000000ull, @@ -356,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) | (qp & 0x3f); } +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) { return opc @@ -683,112 +692,32 @@ static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) /* - * Relocations + * Relocations - Note that we never encode branches elsewhere than slot 2. */ -static inline void reloc_pcrel21b(void *pc, intptr_t target) +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) { - uint64_t imm; - int64_t disp; - int slot; - - slot = (intptr_t)pc & 3; - pc = (void *)((intptr_t)pc & ~3); - - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; + uint64_t imm = target - pc; - switch(slot) { - case 0: - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 8) & 0xfffffdc00003ffffull) - | ((imm & 0x100000) << 21) /* s */ - | ((imm & 0x0fffff) << 18); /* imm20b */ - break; - case 1: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xfffffffffffb8000ull) - | ((imm & 0x100000) >> 2) /* s */ - | ((imm & 0x0fffe0) >> 5); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x07ffffffffffffffull) - | ((imm & 0x00001f) << 59); /* imm20b */ - break; - case 2: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fffffffffull) - | ((imm & 0x100000) << 39) /* s */ - | ((imm & 0x0fffff) << 36); /* imm20b */ - break; - } + pc->hi = (pc->hi & 0xf700000fffffffffull) + | ((imm & 0x100000) << 39) /* s */ + | ((imm & 0x0fffff) << 36); /* imm20b */ } -static inline uint64_t get_reloc_pcrel21b (void *pc) +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) { - int64_t low, high; - int slot; - - slot = (tcg_target_long) pc & 3; - pc = (void *)((tcg_target_long) pc & ~3); - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); + int64_t high = pc->hi; - switch(slot) { - case 0: - return ((low >> 21) & 0x100000) + /* s */ - ((low >> 18) & 0x0fffff); /* imm20b */ - case 1: - return ((high << 2) & 0x100000) + /* s */ - ((high << 5) & 0x0fffe0) + /* imm20b */ - ((low >> 59) & 0x00001f); /* imm20b */ - case 2: - return ((high >> 39) & 0x100000) + /* s */ - ((high >> 36) & 0x0fffff); /* imm20b */ - default: - tcg_abort(); - } + return ((high >> 39) & 0x100000) + /* s */ + ((high >> 36) & 0x0fffff); /* imm20b */ } -static inline void reloc_pcrel60b(void *pc, intptr_t target) -{ - int64_t disp; - uint64_t imm; - - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; - - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fff800000ull) - | (imm & 0x0800000000000000ull) /* s */ - | ((imm & 0x07fffff000000000ull) >> 36) /* imm39 */ - | ((imm & 0x00000000000fffffull) << 36); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x00003fffffffffffull) - | ((imm & 0x0000000ffff00000ull) << 28); /* imm39 */ -} - -static inline uint64_t get_reloc_pcrel60b (void *pc) -{ - int64_t low, high; - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); - - return ((high) & 0x0800000000000000ull) + /* s */ - ((high >> 36) & 0x00000000000fffffull) + /* imm20b */ - ((high << 36) & 0x07fffff000000000ull) + /* imm39 */ - ((low >> 28) & 0x0000000ffff00000ull); /* imm39 */ -} - - -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - value += addend; - switch (type) { - case R_IA64_PCREL21B: - reloc_pcrel21b(code_ptr, value); - break; - case R_IA64_PCREL60B: - reloc_pcrel60b(code_ptr, value); - default: - tcg_abort(); - } + assert(addend == 0); + assert(type == R_IA64_PCREL21B); + reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); } /* @@ -815,6 +744,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) #if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); #endif break; case 'Z': @@ -832,7 +762,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct; @@ -851,7 +781,7 @@ static inline int tcg_target_const_match(tcg_target_long val, * Code generation */ -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; static inline void tcg_out_bundle(TCGContext *s, int template, uint64_t slot0, uint64_t slot1, @@ -862,9 +792,10 @@ static inline void tcg_out_bundle(TCGContext *s, int template, slot1 &= 0x1ffffffffffull; /* 41 bits */ slot2 &= 0x1ffffffffffull; /* 41 bits */ - *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template; - *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18); - s->code_ptr += 16; + *s->code_ptr++ = (tcg_insn_unit){ + (slot1 << 46) | (slot0 << 5) | template, + (slot2 << 23) | (slot1 >> 18) + }; } static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) @@ -899,33 +830,34 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_br(TCGContext *s, int label_index) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and memory are kept coherent during retranslation. */ - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); } -static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) { + uintptr_t func = desc->lo, gp = desc->hi, disp; + /* Look through the function descriptor. */ - uintptr_t disp, *desc = (uintptr_t *)addr; tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (desc[1]), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1])); - disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); + disp = (tcg_insn_unit *)func - s->code_ptr; tcg_out_bundle(s, mLX, INSN_NOP_M, tcg_opc_l4 (disp), @@ -933,32 +865,22 @@ static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) TCG_REG_B0, disp)); } -static inline void tcg_out_callr(TCGContext *s, TCGReg addr) -{ - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R3, 8, addr), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3), - INSN_NOP_M, - tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); -} - static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) { - int64_t disp; - uint64_t imm; + uint64_t imm, opc1; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + /* At least arg == 0 is a common operation. */ + if (arg == sextract64(arg, 0, 22)) { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + opc1 = INSN_NOP_M; + } - disp = tb_ret_addr - s->code_ptr; - imm = (uint64_t)disp >> 4; + imm = tb_ret_addr - s->code_ptr; tcg_out_bundle(s, mLX, - INSN_NOP_M, + opc1, tcg_opc_l3 (imm), tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); } @@ -984,7 +906,7 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } - s->tb_next_offset[arg] = s->code_ptr - s->code_buf; + s->tb_next_offset[arg] = tcg_current_code_size(s); } static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) @@ -1505,19 +1427,22 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, TCGReg arg2, int label_index, int cmp4) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); } static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, @@ -1558,238 +1483,285 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, } #if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x1fffff) + /* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the address of the addend TLB entry. - R57 is loaded with the address, zero extented on 32-bit targets. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, - TCGMemOp s_bits, uint64_t offset_rw, - uint64_t offset_addend) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, + R2 is loaded with the addend TLB entry. + R57 is loaded with the address, zero extented on 32-bit targets. + R1, R3 are clobbered, leaving R56 free for... + BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, + TCGMemOp s_bits, int off_rw, int off_add, + uint64_t bswap1, uint64_t bswap2) +{ + /* + .mii + mov r2 = off_rw + extr.u r3 = addr_reg, ... # extract tlb page + zxt4 r57 = addr_reg # or mov for 64-bit guest + ;; + .mii + addl r2 = r2, areg0 + shl r3 = r3, cteb # via dep.z + dep r1 = 0, r57, ... # zero page ofs, keep align + ;; + .mmi + add r2 = r2, r3 + ;; + ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest + nop + ;; + .mmi + nop + cmp.eq p6, p7 = r3, r58 + nop + ;; + */ + tcg_out_bundle(s, miI, + tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, - TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS)); - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - offset_rw, TCG_REG_R2), tcg_opc_ext_i(TCG_REG_P0, TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, - TCG_REG_R57, addr_reg), + TCG_REG_R57, addr_reg)); + tcg_out_bundle(s, miI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0)); - tcg_out_bundle(s, mII, + TCG_REG_R2, TCG_AREG0), + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, + TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1)); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), tcg_opc_m3 (TCG_REG_P0, (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56, - TCG_REG_R2, offset_addend - offset_rw), - tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0, - TCG_REG_R57, 63 - s_bits, - TARGET_PAGE_BITS - s_bits - 1), + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, + TCG_REG_R2, off_add - off_rw), + bswap1); + tcg_out_bundle(s, mmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R56)); + TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), + bswap2); } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, -}; +#define TCG_MAX_QEMU_LDST 640 + +typedef struct TCGLabelQemuLdst { + bool is_ld; + TCGMemOp size; + tcg_insn_unit *label_ptr; /* label pointers to be updated */ +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + int nb_ldst_labels; + TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST]; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->nb_ldst_labels = 0; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + tcg_insn_unit *label_ptr) +{ + TCGBackendData *be = s->be; + TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++]; -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) + assert(be->nb_ldst_labels <= TCG_MAX_QEMU_LDST); + l->is_ld = is_ld; + l->size = opc & MO_SIZE; + l->label_ptr = label_ptr; +} + +static void tcg_out_tb_finalize(TCGContext *s) +{ + static const void * const helpers[8] = { + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, + }; + tcg_insn_unit *thunks[8] = { }; + TCGBackendData *be = s->be; + size_t i, n = be->nb_ldst_labels; + + for (i = 0; i < n; i++) { + TCGLabelQemuLdst *l = &be->ldst_labels[i]; + long x = l->is_ld * 4 + l->size; + tcg_insn_unit *dest = thunks[x]; + + /* The out-of-line thunks are all the same; load the return address + from B0, load the GP, and branch to the code. Note that we are + always post-call, so the register window has rolled, so we're + using incomming parameter register numbers, not outgoing. */ + if (dest == NULL) { + uintptr_t *desc = (uintptr_t *)helpers[x]; + uintptr_t func = desc[0], gp = desc[1], disp; + + thunks[x] = dest = s->code_ptr; + + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_REG_R1, gp)); + tcg_out_bundle(s, mii, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + l->is_ld ? TCG_REG_R35 : TCG_REG_R36, + TCG_REG_B0)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l3 (disp), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); + } + + reloc_pcrel21b_slot2(l->label_ptr, dest); + } +} + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static const uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg, mem_index; - TCGMemOp s_bits, bswap; - - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + TCGMemOp opc, s_bits; + uint64_t fin1, fin2; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; /* Read the TLB entry */ tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + INSN_NOP_I, INSN_NOP_I); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_ld_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - if (bswap && s_bits == MO_16) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 15, 15)); - } else if (bswap && s_bits == MO_32) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 31, 31)); - } else { - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I); - } - if (!bswap) { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - INSN_NOP_I, - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + + fin2 = 0; + if (opc & MO_BSWAP) { + fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + fin2 = tcg_opc_i11(TCG_REG_P0, fin2, + data_reg, data_reg, shift, 63 - shift); + } } else { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); } - tcg_out_bundle(s, miI, - INSN_NOP_M, + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], + TCG_REG_R8, TCG_REG_R2), INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8)); -} + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, -}; + add_qemu_ldst_label(s, 1, opc, label_ptr); + + /* Note that we always use LE helper functions, so the bswap insns + here for the fast path also apply to the slow path. */ + tcg_out_bundle(s, (fin2 ? mII : miI), + INSN_NOP_M, + fin1, + fin2 ? fin2 : INSN_NOP_I); +} -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static const uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; - int addr_reg, data_reg, mem_index; - TCGMemOp s_bits; - - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + TCGReg addr_reg, data_reg; + int mem_index; + uint64_t pre1, pre2; + TCGMemOp opc, s_bits; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; + /* Note that we always use LE helper functions, so the bswap insns + that are here for the fast path also apply to the slow path, + and move the data into the argument register. */ + pre2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); + } + } else { + /* Just move the data into place for the slow path. */ + pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); + } + tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + pre1, pre2); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, + tcg_out_bundle(s, mmI, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_st_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - - switch (opc) { - case MO_8: - case MO_16: - case MO_32: - case MO_64: - tcg_out_bundle(s, mii, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I); - break; - - case MO_16 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_32 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_64 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg)); - data_reg = TCG_REG_R2; - break; - - default: - tcg_abort(); - } - + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index)); + label_ptr = s->code_ptr; tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - data_reg, TCG_REG_R3), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + TCG_REG_R58, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 0, opc, label_ptr); } #else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static uint64_t const opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg; - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -1900,8 +1872,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static uint64_t const opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 @@ -1910,10 +1881,11 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #if TARGET_LONG_BITS == 64 uint64_t add_guest_base; #endif - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -2023,24 +1995,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_br: tcg_out_br(s, args[0]); break; - case INDEX_op_call: - if (likely(const_args[0])) { - tcg_out_calli(s, args[0]); - } else { - tcg_out_callr(s, args[0]); - } - break; case INDEX_op_goto_tb: tcg_out_goto_tb(s, args[0]); break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); @@ -2237,42 +2195,24 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[3], const_args[3], args[4], const_args[4], 0); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, MO_UB); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, MO_SB); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, MO_TEUW); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, MO_TESW); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld32: - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, MO_TEUL); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args); break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, MO_TESL); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, MO_TEQ); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, MO_UB); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, MO_TEUW); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, MO_TEUL); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, MO_TEQ); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); } @@ -2280,13 +2220,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_br, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -2328,9 +2264,6 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, { INDEX_op_ld16u_i64, { "r", "r" } }, @@ -2381,19 +2314,10 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, - { INDEX_op_qemu_ld8u, { "r", "r" } }, - { INDEX_op_qemu_ld8s, { "r", "r" } }, - { INDEX_op_qemu_ld16u, { "r", "r" } }, - { INDEX_op_qemu_ld16s, { "r", "r" } }, - { INDEX_op_qemu_ld32, { "r", "r" } }, - { INDEX_op_qemu_ld32u, { "r", "r" } }, - { INDEX_op_qemu_ld32s, { "r", "r" } }, - { INDEX_op_qemu_ld64, { "r", "r" } }, - - { INDEX_op_qemu_st8, { "SZ", "r" } }, - { INDEX_op_qemu_st16, { "SZ", "r" } }, - { INDEX_op_qemu_st32, { "SZ", "r" } }, - { INDEX_op_qemu_st64, { "SZ", "r" } }, + { INDEX_op_qemu_ld_i32, { "r", "r" } }, + { INDEX_op_qemu_ld_i64, { "r", "r" } }, + { INDEX_op_qemu_st_i32, { "SZ", "r" } }, + { INDEX_op_qemu_st_i64, { "SZ", "r" } }, { -1 }, }; @@ -2412,8 +2336,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) CPU_TEMP_BUF_NLONGS * sizeof(long)); /* First emit adhoc function descriptor */ - *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */ - s->code_ptr += 16; /* skip GP */ + *s->code_ptr = (tcg_insn_unit){ + (uint64_t)(s->code_ptr + 1), /* entry point */ + 0 /* skip gp */ + }; + s->code_ptr++; /* prologue */ tcg_out_bundle(s, miI, diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 52a939c94..d67558988 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -25,6 +25,12 @@ #ifndef TCG_TARGET_IA64 #define TCG_TARGET_IA64 1 +#define TCG_TARGET_INSN_UNIT_SIZE 16 +typedef struct { + uint64_t lo __attribute__((aligned(16))); + uint64_t hi; +} tcg_insn_unit; + /* We only map the first 64 registers */ #define TCG_TARGET_NB_REGS 64 typedef enum { @@ -152,8 +158,7 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 - -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 40551cdcb..9cce3563a 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -24,14 +24,17 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" -#if defined(TCG_TARGET_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN) -# define TCG_NEED_BSWAP 0 +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE 1 #else -# define TCG_NEED_BSWAP 1 +# define MIPS_BE 0 #endif +#define LO_OFF (MIPS_BE * 4) +#define HI_OFF (4 - LO_OFF) + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -64,13 +67,17 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "k1", "gp", "sp", - "fp", + "s8", "ra", }; #endif +#define TCG_TMP0 TCG_REG_AT +#define TCG_TMP1 TCG_REG_T9 + /* check if we really need so many registers :P */ static const TCGReg tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, @@ -79,6 +86,10 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, + TCG_REG_S8, + + /* Call clobbered registers. */ + TCG_REG_T0, TCG_REG_T1, TCG_REG_T2, TCG_REG_T3, @@ -88,12 +99,14 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_T7, TCG_REG_T8, TCG_REG_T9, - TCG_REG_A0, - TCG_REG_A1, - TCG_REG_A2, - TCG_REG_A3, + TCG_REG_V1, TCG_REG_V0, - TCG_REG_V1 + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, }; static const TCGReg tcg_target_call_iarg_regs[4] = { @@ -108,83 +121,49 @@ static const TCGReg tcg_target_call_oarg_regs[2] = { TCG_REG_V1 }; -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; -static inline uint32_t reloc_lo16_val(void *pc, intptr_t target) +static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) { - return target & 0xffff; + /* Let the compiler perform the right-shift as part of the arithmetic. */ + ptrdiff_t disp = target - (pc + 1); + assert(disp == (int16_t)disp); + return disp & 0xffff; } -static inline void reloc_lo16(void *pc, intptr_t target) +static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff) - | reloc_lo16_val(pc, target); + *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); } -static inline uint32_t reloc_hi16_val(void *pc, intptr_t target) +static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) { - return (target >> 16) & 0xffff; + assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); + return ((uintptr_t)target >> 2) & 0x3ffffff; } -static inline void reloc_hi16(void *pc, intptr_t target) +static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff) - | reloc_hi16_val(pc, target); -} - -static inline uint32_t reloc_pc16_val(void *pc, intptr_t target) -{ - int32_t disp; - - disp = target - (intptr_t)pc - 4; - if (disp != (disp << 14) >> 14) { - tcg_abort (); - } - - return (disp >> 2) & 0xffff; + *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); } -static inline void reloc_pc16 (void *pc, tcg_target_long target) +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff) - | reloc_pc16_val(pc, target); + assert(type == R_MIPS_PC16); + assert(addend == 0); + reloc_pc16(code_ptr, (tcg_insn_unit *)value); } -static inline uint32_t reloc_26_val (void *pc, tcg_target_long target) -{ - if ((((tcg_target_long)pc + 4) & 0xf0000000) != (target & 0xf0000000)) { - tcg_abort (); - } +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ +#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ +#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ - return (target >> 2) & 0x3ffffff; -} - -static inline void reloc_pc26(void *pc, intptr_t target) +static inline bool is_p2m1(tcg_target_long val) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3ffffff) - | reloc_26_val(pc, target); -} - -static void patch_reloc(uint8_t *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - value += addend; - switch(type) { - case R_MIPS_LO16: - reloc_lo16(code_ptr, value); - break; - case R_MIPS_HI16: - reloc_hi16(code_ptr, value); - break; - case R_MIPS_PC16: - reloc_pc16(code_ptr, value); - break; - case R_MIPS_26: - reloc_pc26(code_ptr, value); - break; - default: - tcg_abort(); - } + return val && ((val + 1) & val) == 0; } /* parse target specific constraints */ @@ -198,11 +177,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); break; - case 'C': - ct->ct |= TCG_CT_REG; - tcg_regset_clear(ct->u.regs); - tcg_regset_set_reg(ct->u.regs, TCG_REG_T9); - break; case 'L': /* qemu_ld output arg constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); @@ -211,11 +185,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'l': /* qemu_ld input arg constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); -#if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -# if (TARGET_LONG_BITS == 64) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# endif +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 64) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + } #endif break; case 'S': /* qemu_st constraint */ @@ -223,13 +197,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_set(ct->u.regs, 0xffffffff); tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) -# if (TARGET_LONG_BITS == 32) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); -# endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); -# endif + if (TARGET_LONG_BITS == 32) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + } #endif break; case 'I': @@ -238,6 +211,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'J': ct->ct |= TCG_CT_CONST_S16; break; + case 'K': + ct->ct |= TCG_CT_CONST_P2M1; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N16; + break; case 'Z': /* We are cheating a bit here, using the fact that the register ZERO is also the register number 0. Hence there is no need @@ -253,25 +232,32 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct; ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; - else if ((ct & TCG_CT_CONST_ZERO) && val == 0) + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { return 1; - else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) + } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { return 1; - else - return 0; + } else if ((ct & TCG_CT_CONST_P2M1) + && use_mips32r2_instructions && is_p2m1(val)) { + return 1; + } + return 0; } /* instruction opcodes */ -enum { +typedef enum { + OPC_J = 0x02 << 26, + OPC_JAL = 0x03 << 26, OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, OPC_BLEZ = 0x06 << 26, @@ -329,16 +315,17 @@ enum { OPC_MUL = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, + OPC_EXT = OPC_SPECIAL3 | 0x000, OPC_INS = OPC_SPECIAL3 | 0x004, OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, -}; +} MIPSInsn; /* * Type reg */ -static inline void tcg_out_opc_reg(TCGContext *s, int opc, +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rs, TCGReg rt) { int32_t inst; @@ -353,7 +340,7 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, /* * Type immediate */ -static inline void tcg_out_opc_imm(TCGContext *s, int opc, +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs, TCGArg imm) { int32_t inst; @@ -366,15 +353,31 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, } /* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, + TCGReg rs, int msb, int lsb) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (msb & 0x1F) << 11; + inst |= (lsb & 0x1F) << 6; + tcg_out32(s, inst); +} + +/* * Type branch */ -static inline void tcg_out_opc_br(TCGContext *s, int opc, +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs) { /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and memory are kept coherent during retranslation. */ - uint16_t offset = (uint16_t)(*(uint32_t *) s->code_ptr); + uint16_t offset = (uint16_t)*s->code_ptr; tcg_out_opc_imm(s, opc, rt, rs, offset); } @@ -382,7 +385,7 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, /* * Type sa */ -static inline void tcg_out_opc_sa(TCGContext *s, int opc, +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rt, TCGArg sa) { int32_t inst; @@ -395,6 +398,29 @@ static inline void tcg_out_opc_sa(TCGContext *s, int opc, } +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +{ + uintptr_t dest = (uintptr_t)target; + uintptr_t from = (uintptr_t)s->code_ptr + 4; + int32_t inst; + + /* The pc-region branch happens within the 256MB region of + the delay slot (thus the +4). */ + if ((from ^ dest) & -(1 << 28)) { + return false; + } + assert((dest & 3) == 0); + + inst = opc; + inst |= (dest >> 2) & 0x3ffffff; + tcg_out32(s, inst); + return true; +} + static inline void tcg_out_nop(TCGContext *s) { tcg_out32(s, 0); @@ -417,8 +443,10 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } else if (arg == (uint16_t)arg) { tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); } else { - tcg_out_opc_imm(s, OPC_LUI, reg, 0, arg >> 16); - tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); + if (arg & 0xffff) { + tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + } } } @@ -428,14 +456,14 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -446,14 +474,14 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -464,22 +492,22 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); } else { /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_REG_AT, TCG_REG_AT, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -503,16 +531,18 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) } } -static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, - TCGReg arg1, TCGArg arg2) +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, + TCGReg addr, intptr_t ofs) { - if (arg2 == (int16_t) arg2) { - tcg_out_opc_imm(s, opc, arg, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, arg2); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, TCG_REG_AT, arg1); - tcg_out_opc_imm(s, opc, arg, TCG_REG_AT, 0); + int16_t lo = ofs; + if (ofs != lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); + } + addr = TCG_TMP0; } + tcg_out_opc_imm(s, opc, data, addr, lo); } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, @@ -532,1056 +562,1013 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) if (val == (int16_t)val) { tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, val); - tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_REG_AT); - } -} - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * arg_num is where we want to put this argument, and is updated to be ready - * for the next call. arg is the argument itself. Note that arg_num 0..3 is - * real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_CALL_IARG(NAME, ARGPARAM) \ - static inline void NAME(TCGContext *s, int *arg_num, ARGPARAM) \ - { \ - if (*arg_num < 4) { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(tcg_target_call_iarg_regs[*arg_num]); \ - } else { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(TCG_REG_AT); \ - tcg_out_st(s, TCG_TYPE_I32, TCG_REG_AT, TCG_REG_SP, 4 * (*arg_num)); \ - } \ - (*arg_num)++; \ -} -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg8, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xffff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_movi(s, TCG_TYPE_I32, A, arg); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG - -/* We don't use the macro for this one to avoid an unnecessary reg-reg - move when storing to the stack. */ -static inline void tcg_out_call_iarg_reg32(TCGContext *s, int *arg_num, - TCGReg arg) -{ - if (*arg_num < 4) { - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[*arg_num], arg); - } else { - tcg_out_st(s, TCG_TYPE_I32, arg, TCG_REG_SP, 4 * (*arg_num)); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); + tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); } - (*arg_num)++; } -static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num, - TCGReg arg_low, TCGReg arg_high) -{ - (*arg_num) = (*arg_num + 1) & ~1; - -#if defined(TCG_TARGET_WORDS_BIGENDIAN) - tcg_out_call_iarg_reg32(s, arg_num, arg_high); - tcg_out_call_iarg_reg32(s, arg_num, arg_low); -#else - tcg_out_call_iarg_reg32(s, arg_num, arg_low); - tcg_out_call_iarg_reg32(s, arg_num, arg_high); -#endif -} +/* Bit 0 set if inversion required; bit 1 set if swapping required. */ +#define MIPS_CMP_INV 1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { + [TCG_COND_LT] = 0, + [TCG_COND_LTU] = 0, + [TCG_COND_GE] = MIPS_CMP_INV, + [TCG_COND_GEU] = MIPS_CMP_INV, + [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_GT] = MIPS_CMP_SWAP, + [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int label_index) +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) { - TCGLabel *l = &s->labels[label_index]; + MIPSInsn s_opc = OPC_SLTU; + int cmp_map; switch (cond) { case TCG_COND_EQ: - tcg_out_opc_br(s, OPC_BEQ, arg1, arg2); + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); break; + case TCG_COND_NE: - tcg_out_opc_br(s, OPC_BNE, arg1, arg2); - break; - case TCG_COND_LT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; } + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - break; + + case TCG_COND_LT: case TCG_COND_GE: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - } - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - break; case TCG_COND_LE: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - } - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - break; case TCG_COND_GT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - } - break; + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - break; - default: - tcg_abort(); + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); + if (cmp_map & MIPS_CMP_INV) { + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } break; - } - if (l->has_value) { - reloc_pc16(s->code_ptr - 4, l->u.value); - } else { - tcg_out_reloc(s, s->code_ptr - 4, R_MIPS_PC16, label_index, 0); - } - tcg_out_nop(s); + + default: + tcg_abort(); + break; + } } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, TCGArg arg3, TCGArg arg4, - int label_index) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, int label_index) { - void *label_ptr; + static const MIPSInsn b_zero[16] = { + [TCG_COND_LT] = OPC_BLTZ, + [TCG_COND_GT] = OPC_BGTZ, + [TCG_COND_LE] = OPC_BLEZ, + [TCG_COND_GE] = OPC_BGEZ, + }; + + TCGLabel *l; + MIPSInsn s_opc = OPC_SLTU; + MIPSInsn b_opc; + int cmp_map; - switch(cond) { - case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, arg2, arg4, label_index); - tcg_out_brcond(s, TCG_COND_NE, arg1, arg3, label_index); - return; + switch (cond) { case TCG_COND_EQ: + b_opc = OPC_BEQ; break; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_brcond(s, TCG_COND_LT, arg2, arg4, label_index); + case TCG_COND_NE: + b_opc = OPC_BNE; break; + + case TCG_COND_LT: case TCG_COND_GT: + case TCG_COND_LE: case TCG_COND_GE: - tcg_out_brcond(s, TCG_COND_GT, arg2, arg4, label_index); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LTU, arg2, arg4, label_index); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GTU, arg2, arg4, label_index); - break; - default: - tcg_abort(); - } - - label_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, arg2, arg4); - tcg_out_nop(s); + if (arg2 == 0) { + b_opc = b_zero[cond]; + arg2 = arg1; + arg1 = 0; + break; + } + s_opc = OPC_SLT; + /* FALLTHRU */ - switch(cond) { - case TCG_COND_EQ: - tcg_out_brcond(s, TCG_COND_EQ, arg1, arg3, label_index); - break; - case TCG_COND_LT: case TCG_COND_LTU: - tcg_out_brcond(s, TCG_COND_LTU, arg1, arg3, label_index); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LEU, arg1, arg3, label_index); - break; - case TCG_COND_GT: case TCG_COND_GTU: - tcg_out_brcond(s, TCG_COND_GTU, arg1, arg3, label_index); - break; - case TCG_COND_GE: + case TCG_COND_LEU: case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GEU, arg1, arg3, label_index); + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); + arg1 = TCG_TMP0; + arg2 = TCG_REG_ZERO; break; + default: tcg_abort(); + break; } - reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr); + tcg_out_opc_br(s, b_opc, arg1, arg2); + l = &s->labels[label_index]; + if (l->has_value) { + reloc_pc16(s->code_ptr - 1, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, label_index, 0); + } + tcg_out_nop(s); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg c1, TCGArg c2, TCGArg v) +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, + TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh) { - switch (cond) { - case TCG_COND_EQ: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); + /* Merge highpart comparison into AH. */ + if (bh != 0) { + if (ah != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); + ah = tmp0; } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + ah = bh; } - break; - case TCG_COND_NE: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); + } + /* Merge lowpart comparison into AL. */ + if (bl != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); + al = tmp1; } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + al = bl; + } + } + /* Merge high and low part comparisons into AL. */ + if (ah != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); + al = tmp0; + } else { + al = ah; } - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - default: - tcg_abort(); - break; } + return al; } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2) +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { + TCGReg tmp0 = TCG_TMP0; + TCGReg tmp1 = ret; + + assert(ret != TCG_TMP0); + if (ret == ah || ret == bh) { + assert(ret != TCG_TMP1); + tmp1 = TCG_TMP1; + } + switch (cond) { case TCG_COND_EQ: - if (arg1 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg2, 1); - } else if (arg2 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); - } - break; case TCG_COND_NE: - if (arg1 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg2); - } else if (arg2 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); - } + tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); + tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + + default: + tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); + tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); + tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, int label_index) +{ + TCGCond b_cond = TCG_COND_NE; + TCGReg tmp = TCG_TMP1; + + /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. + With setcond, we emit between 3 and 10 insns and only 1 branch, + which ought to get better branch prediction. */ + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + b_cond = cond; + tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); break; + default: - tcg_abort(); + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + b_cond = TCG_COND_EQ; + } + tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); break; } + + tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, label_index); } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4) +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, TCGReg c2, TCGReg v) { + MIPSInsn m_opc = OPC_MOVN; + switch (cond) { case TCG_COND_EQ: - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_AND, ret, TCG_REG_AT, TCG_REG_T0); - return; + m_opc = OPC_MOVZ; + /* FALLTHRU */ case TCG_COND_NE: - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_AT, TCG_REG_T0); - return; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_setcond(s, TCG_COND_LT, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_GT: - case TCG_COND_GE: - tcg_out_setcond(s, TCG_COND_GT, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LTU, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GTU, TCG_REG_AT, arg2, arg4); + if (c2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + } break; + default: - tcg_abort(); + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + m_opc = OPC_MOVZ; + } + tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; break; } - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg2, arg4); + tcg_out_opc_reg(s, m_opc, ret, v, c1); +} - switch(cond) { - case TCG_COND_LT: - case TCG_COND_LTU: - tcg_out_setcond(s, TCG_COND_LTU, ret, arg1, arg3); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LEU, ret, arg1, arg3); - break; - case TCG_COND_GT: - case TCG_COND_GTU: - tcg_out_setcond(s, TCG_COND_GTU, ret, arg1, arg3); - break; - case TCG_COND_GE: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GEU, ret, arg1, arg3); - break; - default: - tcg_abort(); +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + /* Note that the ABI requires the called function's address to be + loaded into T9, even if a direct branch is in range. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + + /* But do try a direct branch, allowing the cpu better insn prefetch. */ + if (tail) { + if (!tcg_out_opc_jmp(s, OPC_J, arg)) { + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); + } + } else { + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } } +} - tcg_out_opc_reg(s, OPC_AND, ret, ret, TCG_REG_T0); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); + tcg_out_nop(s); } #if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; -#endif -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) { - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; -#if defined(CONFIG_SOFTMMU) - void *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -# if TARGET_LONG_BITS == 64 - uint8_t *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif -#endif - data_regl = *args++; - if (opc == 3) - data_regh = *args++; - else - data_regh = 0; - addr_regl = *args++; -#if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(TCG_TARGET_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif - mem_index = *args; - s_bits = opc & 3; -#endif + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } else { + tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); + } + return i + 1; +} - if (opc == 3) { -#if defined(TCG_TARGET_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_TMP0; + if (arg == 0) { + tmp = TCG_REG_ZERO; } else { - data_reg1 = data_regl; - data_reg2 = 0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); } -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ + i = (i + 1) & ~1; + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); + return i; +} + +/* Perform the tlb comparison operation. The complete host address is + placed in BASE. Clobbers AT, T0, A0. */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, + TCGReg addrh, int mem_index, TCGMemOp s_bits, + tcg_insn_unit *label_ptr[2], bool is_load) +{ + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_memh); + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); + /* Load the tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF); + if (TARGET_LONG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF); + } - reloc_pc16(label3_ptr, (tcg_target_long) s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif - - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); + /* Mask the page bits, keeping the alignment bits to compare against. + In between, load the tlb addend for the fast path. */ + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xff); - break; - case 0 | 4: - tcg_out_ext8s(s, data_reg1, TCG_REG_V0); - break; - case 1: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xffff); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg1, TCG_REG_V0); - break; - case 2: - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - case 3: - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_V1); - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - default: - tcg_abort(); + label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + if (TARGET_LONG_BITS == 64) { + /* delay slot */ + tcg_out_nop(s); + + label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrh, base); + } + + /* delay slot */ + tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + int mem_index, void *raddr, + tcg_insn_unit *label_ptr[2]) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS == 64) { + label->label_ptr[1] = label_ptr[1]; + } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGReg v0; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_call_int(s, qemu_ld_helpers[opc], false); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + v0 = l->datalo_reg; + if ((opc & MO_SIZE) == MO_64) { + /* We eliminated V0 from the possible output registers, so it + cannot be clobbered here. So we must move V1 first. */ + if (MIPS_BE) { + tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); + v0 = l->datahi_reg; + } else { + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); + } } - label2_ptr = s->code_ptr; + reloc_pc16(s->code_ptr, l->raddr); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); +} - /* label1: fast path */ - reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr); +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGMemOp s_bits = opc & MO_SIZE; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_V0, addr_regl, GUEST_BASE); + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_V0, addr_regl); + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); } + switch (s_bits) { + case MO_8: + i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); + break; + case MO_16: + i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); + break; + case MO_32: + i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); + break; + case MO_64: + i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); + break; + default: + tcg_abort(); + } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + + /* Tail call to the store helper. Thus force the return address + computation to take place in the return address register. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); + i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); + tcg_out_call_int(s, qemu_st_helpers[opc], true); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +} #endif - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_LBU, data_reg1, TCG_REG_V0, 0); +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); break; - case 0 | 4: - tcg_out_opc_imm(s, OPC_LB, data_reg1, TCG_REG_V0, 0); + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LHU, data_reg1, TCG_REG_V0, 0); - } + case MO_UW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16(s, datalo, TCG_TMP1); break; - case 1 | 4: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16s(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LH, data_reg1, TCG_REG_V0, 0); - } + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - } + case MO_SW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16s(s, datalo, TCG_TMP1); break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 4); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg2, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - tcg_out_opc_imm(s, OPC_LW, data_reg2, TCG_REG_V0, 4); - } + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + break; + case MO_UL | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); + tcg_out_bswap32(s, datalo, TCG_TMP1); + break; + case MO_UL: + tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + break; + case MO_Q | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_TMP1); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_TMP1); + break; + case MO_Q: + tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); break; default: tcg_abort(); } - -#if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, (tcg_target_long) s->code_ptr); -#endif } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) { - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - uint8_t *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -#endif -#if TARGET_LONG_BITS == 64 -# if defined(CONFIG_SOFTMMU) - uint8_t *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; #endif + /* Note that we've eliminated V0 from the output registers, + so we won't overwrite the base register during loading. */ + TCGReg base = TCG_REG_V0; + data_regl = *args++; - if (opc == 3) { - data_regh = *args++; - } else { - data_regh = 0; - } + data_regh = (is_64 ? *args++ : 0); addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + #if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(TCG_TARGET_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif mem_index = *args; - s_bits = opc; -#endif + s_bits = opc & MO_SIZE; - if (opc == 3) { -#if defined(TCG_TARGET_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 1, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); #else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif + if (GUEST_BASE == 0 && data_regl != addr_regl) { + base = addr_regl; + } else if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); } else { - data_reg1 = data_regl; - data_reg2 = 0; + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_memh); - - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); - - reloc_pc16(label3_ptr, (tcg_target_long) s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_8: + tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + break; - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - switch(opc) { - case 0: - tcg_out_call_iarg_reg8(s, &arg_num, data_regl); + case MO_16 | MO_BSWAP: + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_16: + tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); break; - case 1: - tcg_out_call_iarg_reg16(s, &arg_num, data_regl); + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_32: + tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); break; - case 2: - tcg_out_call_iarg_reg32(s, &arg_num, data_regl); + + case MO_64 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, TCG_TMP1, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); break; - case 3: - tcg_out_call_iarg_reg64(s, &arg_num, data_regl, data_regh); + case MO_64: + tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); break; + default: tcg_abort(); } - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); - - label2_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); +} - /* label1: fast path */ - reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr); +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, addr_regl, GUEST_BASE); + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); + th = ah; } -#endif - - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_SB, data_reg1, TCG_REG_A0, 0); - break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff); - tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0); - } else { - tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0); - } - break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); } - break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg2); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 4); + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); - tcg_out_opc_imm(s, OPC_SW, data_reg2, TCG_REG_A0, 4); + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); } - break; - default: - tcg_abort(); + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); } +} +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh, base; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, (tcg_target_long) s->code_ptr); + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; +#endif + + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + s_bits = opc & 3; + + /* Note that we eliminated the helper's address argument, + so we can reuse that for the base. */ + base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); +#else + if (GUEST_BASE == 0) { + base = addr_regl; + } else { + base = TCG_REG_A0; + if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); #endif } static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - switch(opc) { + MIPSInsn i1, i2; + TCGArg a0, a1, a2; + int c2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_V0, args[0]); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, (tcg_target_long)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); - tcg_out_nop(s); + { + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, + (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - tcg_abort(); + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Avoid clobbering the address during retranslation. */ + tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); } else { /* indirect jump method */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (tcg_target_long)(s->tb_next + args[0])); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_AT, 0); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_next + a0)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - case INDEX_op_call: - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, args[0], 0); - tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]); - break; - - case INDEX_op_mov_i32: - tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, a0); break; case INDEX_op_ld8u_i32: - tcg_out_ldst(s, OPC_LBU, args[0], args[1], args[2]); - break; + i1 = OPC_LBU; + goto do_ldst; case INDEX_op_ld8s_i32: - tcg_out_ldst(s, OPC_LB, args[0], args[1], args[2]); - break; + i1 = OPC_LB; + goto do_ldst; case INDEX_op_ld16u_i32: - tcg_out_ldst(s, OPC_LHU, args[0], args[1], args[2]); - break; + i1 = OPC_LHU; + goto do_ldst; case INDEX_op_ld16s_i32: - tcg_out_ldst(s, OPC_LH, args[0], args[1], args[2]); - break; + i1 = OPC_LH; + goto do_ldst; case INDEX_op_ld_i32: - tcg_out_ldst(s, OPC_LW, args[0], args[1], args[2]); - break; + i1 = OPC_LW; + goto do_ldst; case INDEX_op_st8_i32: - tcg_out_ldst(s, OPC_SB, args[0], args[1], args[2]); - break; + i1 = OPC_SB; + goto do_ldst; case INDEX_op_st16_i32: - tcg_out_ldst(s, OPC_SH, args[0], args[1], args[2]); - break; + i1 = OPC_SH; + goto do_ldst; case INDEX_op_st_i32: - tcg_out_ldst(s, OPC_SW, args[0], args[1], args[2]); + i1 = OPC_SW; + do_ldst: + tcg_out_ldst(s, i1, a0, a1, a2); break; case INDEX_op_add_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, args[0], args[1], args[2]); - } - break; - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], args[4]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, args[2], args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, TCG_REG_AT, args[2]); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], args[5]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[3], args[5]); + i1 = OPC_ADDU, i2 = OPC_ADDIU; + goto do_binary; + case INDEX_op_or_i32: + i1 = OPC_OR, i2 = OPC_ORI; + goto do_binary; + case INDEX_op_xor_i32: + i1 = OPC_XOR, i2 = OPC_XORI; + do_binary: + if (c2) { + tcg_out_opc_imm(s, i2, a0, a1, a2); + break; } - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); + do_binaryv: + tcg_out_opc_reg(s, i1, a0, a1, a2); break; + case INDEX_op_sub_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], -args[2]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); + break; } - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], -args[4]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, args[2], args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, args[2], TCG_REG_AT); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], -args[5]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[3], args[5]); + i1 = OPC_SUBU; + goto do_binary; + case INDEX_op_and_i32: + if (c2 && a2 != (uint16_t)a2) { + int msb = ctz32(~a2) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + break; } - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); - break; + i1 = OPC_AND, i2 = OPC_ANDI; + goto do_binary; + case INDEX_op_nor_i32: + i1 = OPC_NOR; + goto do_binaryv; + case INDEX_op_mul_i32: if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; } - break; - case INDEX_op_muls2_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); - break; - case INDEX_op_mulu2_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_mulsh_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_muluh_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_MULTU, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - break; + i1 = OPC_DIVU, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + i1 = OPC_DIVU, i2 = OPC_MFHI; + do_hilo1: + tcg_out_opc_reg(s, i1, 0, a1, a2); + tcg_out_opc_reg(s, i2, a0, 0, 0); break; - case INDEX_op_and_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ANDI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_AND, args[0], args[1], args[2]); - } - break; - case INDEX_op_or_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ORI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_OR, args[0], args[1], args[2]); - } - break; - case INDEX_op_nor_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], args[1], args[2]); + case INDEX_op_muls2_i32: + i1 = OPC_MULT; + goto do_hilo2; + case INDEX_op_mulu2_i32: + i1 = OPC_MULTU; + do_hilo2: + tcg_out_opc_reg(s, i1, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; + case INDEX_op_not_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], TCG_REG_ZERO, args[1]); - break; - case INDEX_op_xor_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_XORI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_XOR, args[0], args[1], args[2]); - } + i1 = OPC_NOR; + goto do_unary; + case INDEX_op_bswap16_i32: + i1 = OPC_WSBH; + goto do_unary; + case INDEX_op_ext8s_i32: + i1 = OPC_SEB; + goto do_unary; + case INDEX_op_ext16s_i32: + i1 = OPC_SEH; + do_unary: + tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); break; case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRA, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_SRAV, args[0], args[2], args[1]); - } - break; + i1 = OPC_SRAV, i2 = OPC_SRA; + goto do_shift; case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SLL, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_SLLV, args[0], args[2], args[1]); - } - break; + i1 = OPC_SLLV, i2 = OPC_SLL; + goto do_shift; case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRL, args[0], args[1], args[2]); + i1 = OPC_SRLV, i2 = OPC_SRL; + goto do_shift; + case INDEX_op_rotr_i32: + i1 = OPC_ROTRV, i2 = OPC_ROTR; + do_shift: + if (c2) { + tcg_out_opc_sa(s, i2, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, i1, a0, a2, a1); } break; case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); - } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32); - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]); - tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); } else { - tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); } break; - case INDEX_op_bswap16_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); - break; case INDEX_op_bswap32_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[0], 16); - break; - - case INDEX_op_ext8s_i32: - tcg_out_opc_reg(s, OPC_SEB, args[0], 0, args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_opc_reg(s, OPC_SEH, args[0], 0, args[1]); + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); + tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); break; case INDEX_op_deposit_i32: - tcg_out_opc_imm(s, OPC_INS, args[0], args[2], - ((args[3] + args[4] - 1) << 11) | (args[3] << 6)); + tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], args[3]); + tcg_out_brcond(s, a2, a0, a1, args[3]); break; case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], args[5]); break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); break; case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2]); + tcg_out_setcond(s, args[3], a0, a1, a2); break; case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], args[0], args[1], args[2], args[3], args[4]); + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 1); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); break; - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false); break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); } @@ -1590,11 +1577,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "C" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -1614,9 +1598,9 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, - { INDEX_op_and_i32, { "r", "rZ", "rI" } }, + { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, { INDEX_op_not_i32, { "r", "rZ" } }, { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, @@ -1641,34 +1625,20 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "L", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, #else - { INDEX_op_qemu_ld8u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, #endif { -1 }, }; @@ -1682,7 +1652,7 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, /* should be last for ABI compliance */ }; @@ -1814,6 +1784,7 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_A1) | (1 << TCG_REG_A2) | (1 << TCG_REG_A3) | + (1 << TCG_REG_T0) | (1 << TCG_REG_T1) | (1 << TCG_REG_T2) | (1 << TCG_REG_T3) | @@ -1828,11 +1799,18 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_AT); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ tcg_add_target_add_op_defs(mips_op_defs); } + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + *ptr = deposit32(*ptr, 0, 26, addr >> 2); + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 683c6af8b..c88a1c927 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -26,10 +26,7 @@ #ifndef TCG_TARGET_MIPS #define TCG_TARGET_MIPS 1 -#ifdef __MIPSEB__ -# define TCG_TARGET_WORDS_BIGENDIAN -#endif - +#define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_NB_REGS 32 typedef enum { @@ -63,16 +60,14 @@ typedef enum { TCG_REG_K1, TCG_REG_GP, TCG_REG_SP, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, -} TCGReg; -#define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S16 0x400 + TCG_REG_CALL_STACK = TCG_REG_SP, + TCG_AREG0 = TCG_REG_S0, +} TCGReg; /* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 8 #define TCG_TARGET_CALL_STACK_OFFSET 16 #define TCG_TARGET_CALL_ALIGN_ARGS 1 @@ -109,6 +104,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 @@ -122,15 +118,11 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_new_ldst 0 - /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ #define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ -#define TCG_AREG0 TCG_REG_S0 - #ifdef __OpenBSD__ #include <machine/sysarch.h> #else diff --git a/tcg/optimize.c b/tcg/optimize.c index 7777743e8..34ae3c285 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -83,6 +83,20 @@ static int op_bits(TCGOpcode op) return def->flags & TCG_OPF_64BIT ? 64 : 32; } +static TCGOpcode op_to_mov(TCGOpcode op) +{ + switch (op_bits(op)) { + case 32: + return INDEX_op_mov_i32; + case 64: + return INDEX_op_mov_i64; + default: + fprintf(stderr, "op_to_mov: unexpected return value of " + "function op_bits.\n"); + tcg_abort(); + } +} + static TCGOpcode op_to_movi(TCGOpcode op) { switch (op_bits(op)) { @@ -148,11 +162,22 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return false; } -static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, - TCGArg dst, TCGArg src) +static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args, + TCGOpcode old_op, TCGArg dst, TCGArg src) { + TCGOpcode new_op = op_to_mov(old_op); + tcg_target_ulong mask; + + s->gen_opc_buf[op_index] = new_op; + reset_temp(dst); - temps[dst].mask = temps[src].mask; + mask = temps[src].mask; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + assert(temps[src].state != TCG_TEMP_CONST); if (s->temps[src].type == s->temps[dst].type) { @@ -172,30 +197,28 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, gen_args[1] = src; } -static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) +static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args, + TCGOpcode old_op, TCGArg dst, TCGArg val) { + TCGOpcode new_op = op_to_movi(old_op); + tcg_target_ulong mask; + + s->gen_opc_buf[op_index] = new_op; + reset_temp(dst); temps[dst].state = TCG_TEMP_CONST; temps[dst].val = val; - temps[dst].mask = val; + mask = val; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + gen_args[0] = dst; gen_args[1] = val; } -static TCGOpcode op_to_mov(TCGOpcode op) -{ - switch (op_bits(op)) { - case 32: - return INDEX_op_mov_i32; - case 64: - return INDEX_op_mov_i64; - default: - fprintf(stderr, "op_to_mov: unexpected return value of " - "function op_bits.\n"); - tcg_abort(); - } -} - static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { uint64_t l64, h64; @@ -220,34 +243,35 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) return x ^ y; case INDEX_op_shl_i32: - return (uint32_t)x << (uint32_t)y; + return (uint32_t)x << (y & 31); case INDEX_op_shl_i64: - return (uint64_t)x << (uint64_t)y; + return (uint64_t)x << (y & 63); case INDEX_op_shr_i32: - return (uint32_t)x >> (uint32_t)y; + return (uint32_t)x >> (y & 31); + case INDEX_op_trunc_shr_i32: case INDEX_op_shr_i64: - return (uint64_t)x >> (uint64_t)y; + return (uint64_t)x >> (y & 63); case INDEX_op_sar_i32: - return (int32_t)x >> (int32_t)y; + return (int32_t)x >> (y & 31); case INDEX_op_sar_i64: - return (int64_t)x >> (int64_t)y; + return (int64_t)x >> (y & 63); case INDEX_op_rotr_i32: - return ror32(x, y); + return ror32(x, y & 31); case INDEX_op_rotr_i64: - return ror64(x, y); + return ror64(x, y & 63); case INDEX_op_rotl_i32: - return rol32(x, y); + return rol32(x, y & 31); case INDEX_op_rotl_i64: - return rol64(x, y); + return rol64(x, y & 63); CASE_OP_32_64(not): return ~x; @@ -512,12 +536,8 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_defs) { - int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; - tcg_target_ulong mask, affected; - TCGOpcode op; - const TCGOpDef *def; + int nb_ops, op_index, nb_temps, nb_globals; TCGArg *gen_args; - TCGArg tmp; /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. @@ -531,22 +551,27 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, nb_ops = tcg_opc_ptr - s->gen_opc_buf; gen_args = args; for (op_index = 0; op_index < nb_ops; op_index++) { - op = s->gen_opc_buf[op_index]; - def = &tcg_op_defs[op]; - /* Do copy propagation */ + TCGOpcode op = s->gen_opc_buf[op_index]; + const TCGOpDef *def = &tcg_op_defs[op]; + tcg_target_ulong mask, partmask, affected; + int nb_oargs, nb_iargs, nb_args, i; + TCGArg tmp; + if (op == INDEX_op_call) { - int nb_oargs = args[0] >> 16; - int nb_iargs = args[0] & 0xffff; - for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { - if (temps[args[i]].state == TCG_TEMP_COPY) { - args[i] = find_better_copy(s, args[i]); - } - } + *gen_args++ = tmp = *args++; + nb_oargs = tmp >> 16; + nb_iargs = tmp & 0xffff; + nb_args = nb_oargs + nb_iargs + def->nb_cargs; } else { - for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { - if (temps[args[i]].state == TCG_TEMP_COPY) { - args[i] = find_better_copy(s, args[i]); - } + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_args = def->nb_args; + } + + /* Do copy propagation */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + if (temps[args[i]].state == TCG_TEMP_COPY) { + args[i] = find_better_copy(s, args[i]); } } @@ -617,8 +642,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(rotr): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); args += 3; gen_args += 2; continue; @@ -747,8 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; } args += 3; @@ -806,29 +829,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_sar_i32: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val; + tmp = temps[args[2]].val & 31; + mask = (int32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_sar_i64: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val; + tmp = temps[args[2]].val & 63; + mask = (int64_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i32: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val; + tmp = temps[args[2]].val & 31; + mask = (uint32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i64: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val; + tmp = temps[args[2]].val & 63; + mask = (uint64_t)temps[args[1]].mask >> tmp; } break; + case INDEX_op_trunc_shr_i32: + mask = (uint64_t)temps[args[1]].mask >> args[2]; + break; + CASE_OP_32_64(shl): if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = temps[args[1]].mask << temps[args[2]].val; + tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); + mask = temps[args[1]].mask << tmp; } break; @@ -838,9 +870,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; CASE_OP_32_64(deposit): - tmp = ((1ull << args[4]) - 1); - mask = ((temps[args[1]].mask & ~(tmp << args[3])) - | ((temps[args[2]].mask & tmp) << args[3])); + mask = deposit64(temps[args[1]].mask, args[3], args[4], + temps[args[2]].mask); break; CASE_OP_32_64(or): @@ -849,6 +880,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; CASE_OP_32_64(setcond): + case INDEX_op_setcond2_i32: mask = 1; break; @@ -857,23 +889,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; CASE_OP_32_64(ld8u): - case INDEX_op_qemu_ld8u: mask = 0xff; break; CASE_OP_32_64(ld16u): - case INDEX_op_qemu_ld16u: mask = 0xffff; break; case INDEX_op_ld32u_i64: -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_qemu_ld32u: -#endif mask = 0xffffffffu; break; CASE_OP_32_64(qemu_ld): { - TCGMemOp mop = args[def->nb_oargs + def->nb_iargs]; + TCGMemOp mop = args[nb_oargs + nb_iargs]; if (!(mop & MO_SIGN)) { mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; } @@ -884,34 +911,36 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit - results */ - if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { - mask &= 0xffffffffu; + /* 32-bit ops generate 32-bit results. For the result is zero test + below, we can ignore high bits, but for further optimizations we + need to record that the high bits contain garbage. */ + partmask = mask; + if (!(def->flags & TCG_OPF_64BIT)) { + mask |= ~(tcg_target_ulong)0xffffffffu; + partmask &= 0xffffffffu; + affected &= 0xffffffffu; } - if (mask == 0) { - assert(def->nb_oargs == 1); - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); - args += def->nb_oargs + def->nb_iargs + def->nb_cargs; + if (partmask == 0) { + assert(nb_oargs == 1); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); + args += nb_args; gen_args += 2; continue; } if (affected == 0) { - assert(def->nb_oargs == 1); + assert(nb_oargs == 1); if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else if (temps[args[1]].state != TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; } else { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); + tcg_opt_gen_movi(s, op_index, gen_args, op, + args[0], temps[args[1]].val); gen_args += 2; } - args += def->nb_iargs + 1; + args += nb_args; continue; } @@ -923,8 +952,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); args += 3; gen_args += 2; continue; @@ -942,8 +970,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[1])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, + args[0], args[1]); gen_args += 2; } args += 3; @@ -960,8 +988,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); gen_args += 2; args += 3; continue; @@ -982,19 +1009,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; args += 2; break; } /* Source argument is constant. Rewrite the operation and let movi case handle it. */ - op = op_to_movi(op); - s->gen_opc_buf[op_index] = op; args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(gen_args, args[0], args[1]); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]); gen_args += 2; args += 2; break; @@ -1008,15 +1033,24 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: if (temps[args[1]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 2; break; } goto do_default; + case INDEX_op_trunc_shr_i32: + if (temps[args[1]].state == TCG_TEMP_CONST) { + tmp = do_constant_folding(op, temps[args[1]].val, args[2]); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); + gen_args += 2; + args += 3; + break; + } + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): @@ -1041,10 +1075,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(remu): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 3; break; @@ -1054,11 +1087,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tmp = ((1ull << args[4]) - 1); - tmp = (temps[args[1]].val & ~(tmp << args[3])) - | ((temps[args[2]].val & tmp) << args[3]); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tmp = deposit64(temps[args[1]].val, args[3], args[4], + temps[args[2]].val); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 5; break; @@ -1068,8 +1099,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); if (tmp != 2) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], tmp); + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; args += 4; break; @@ -1098,12 +1128,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps_are_copies(args[0], args[4-tmp])) { s->gen_opc_buf[op_index] = INDEX_op_nop; } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - s->gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); + tcg_opt_gen_movi(s, op_index, gen_args, op, + args[0], temps[args[4-tmp]].val); gen_args += 2; } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); + tcg_opt_gen_mov(s, op_index, gen_args, op, + args[0], args[4-tmp]); gen_args += 2; } args += 6; @@ -1136,10 +1166,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, rl = args[0]; rh = args[1]; - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); - tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op_index, &gen_args[0], + op, rl, (uint32_t)a); + tcg_opt_gen_movi(s, ++op_index, &gen_args[2], + op, rh, (uint32_t)(a >> 32)); gen_args += 4; args += 6; break; @@ -1159,10 +1189,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, rl = args[0]; rh = args[1]; - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); - tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op_index, &gen_args[0], + op, rl, (uint32_t)r); + tcg_opt_gen_movi(s, ++op_index, &gen_args[2], + op, rh, (uint32_t)(r >> 32)); gen_args += 4; args += 4; break; @@ -1173,11 +1203,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); if (tmp != 2) { if (tmp) { + do_brcond_true: reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[5]; gen_args += 1; } else { + do_brcond_false: s->gen_opc_buf[op_index] = INDEX_op_nop; } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) @@ -1187,6 +1219,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ + do_brcond_high: reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; gen_args[0] = args[1]; @@ -1194,6 +1227,49 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_args[2] = args[4]; gen_args[3] = args[5]; gen_args += 4; + } else if (args[4] == TCG_COND_EQ) { + /* Simplify EQ comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[0], args[2], TCG_COND_EQ); + if (tmp == 0) { + goto do_brcond_false; + } else if (tmp == 1) { + goto do_brcond_high; + } + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[1], args[3], TCG_COND_EQ); + if (tmp == 0) { + goto do_brcond_false; + } else if (tmp != 1) { + goto do_default; + } + do_brcond_low: + reset_all_temps(nb_temps); + s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; + gen_args[0] = args[0]; + gen_args[1] = args[2]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; + } else if (args[4] == TCG_COND_NE) { + /* Simplify NE comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[0], args[2], TCG_COND_NE); + if (tmp == 0) { + goto do_brcond_high; + } else if (tmp == 1) { + goto do_brcond_true; + } + tmp = do_constant_folding_cond(INDEX_op_brcond_i32, + args[1], args[3], TCG_COND_NE); + if (tmp == 0) { + goto do_brcond_low; + } else if (tmp == 1) { + goto do_brcond_true; + } + goto do_default; } else { goto do_default; } @@ -1203,8 +1279,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_setcond2_i32: tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); if (tmp != 2) { - s->gen_opc_buf[op_index] = INDEX_op_movi_i32; - tcg_opt_gen_movi(gen_args, args[0], tmp); + do_setcond_const: + tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); gen_args += 2; } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) && temps[args[3]].state == TCG_TEMP_CONST @@ -1213,13 +1289,59 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[4]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ + do_setcond_high: s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; reset_temp(args[0]); + temps[args[0]].mask = 1; gen_args[0] = args[0]; gen_args[1] = args[2]; gen_args[2] = args[4]; gen_args[3] = args[5]; gen_args += 4; + } else if (args[5] == TCG_COND_EQ) { + /* Simplify EQ comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[1], args[3], TCG_COND_EQ); + if (tmp == 0) { + goto do_setcond_const; + } else if (tmp == 1) { + goto do_setcond_high; + } + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[2], args[4], TCG_COND_EQ); + if (tmp == 0) { + goto do_setcond_high; + } else if (tmp != 1) { + goto do_default; + } + do_setcond_low: + reset_temp(args[0]); + temps[args[0]].mask = 1; + s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args[2] = args[3]; + gen_args[3] = args[5]; + gen_args += 4; + } else if (args[5] == TCG_COND_NE) { + /* Simplify NE comparisons where one of the pairs + can be simplified. */ + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[1], args[3], TCG_COND_NE); + if (tmp == 0) { + goto do_setcond_high; + } else if (tmp == 1) { + goto do_setcond_const; + } + tmp = do_constant_folding_cond(INDEX_op_setcond_i32, + args[2], args[4], TCG_COND_NE); + if (tmp == 0) { + goto do_setcond_low; + } else if (tmp == 1) { + goto do_setcond_const; + } + goto do_default; } else { goto do_default; } @@ -1227,24 +1349,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; case INDEX_op_call: - nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); - if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | - TCG_CALL_NO_WRITE_GLOBALS))) { + if (!(args[nb_oargs + nb_iargs + 1] + & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { reset_temp(i); } } - for (i = 0; i < (args[0] >> 16); i++) { - reset_temp(args[i + 1]); - } - i = nb_call_args + 3; - while (i) { - *gen_args = *args; - args++; - gen_args++; - i--; - } - break; + goto do_reset_output; default: do_default: @@ -1256,7 +1367,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (def->flags & TCG_OPF_BB_END) { reset_all_temps(nb_temps); } else { - for (i = 0; i < def->nb_oargs; i++) { + do_reset_output: + for (i = 0; i < nb_oargs; i++) { reset_temp(args[i]); /* Save the corresponding known-zero bits mask for the first output argument (only one supported so far). */ @@ -1265,11 +1377,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } } - for (i = 0; i < def->nb_args; i++) { + for (i = 0; i < nb_args; i++) { gen_args[i] = args[i]; } - args += def->nb_args; - gen_args += def->nb_args; + args += nb_args; + gen_args += nb_args; break; } } diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index dc2c2df89..203027eb3 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -24,27 +24,71 @@ #include "tcg-be-ldst.h" -static uint8_t *tb_ret_addr; - #if defined _CALL_DARWIN || defined __APPLE__ #define TCG_TARGET_CALL_DARWIN #endif +#ifdef _CALL_SYSV +# define TCG_TARGET_CALL_ALIGN_ARGS 1 +#endif -#ifdef TCG_TARGET_CALL_DARWIN -#define LINKAGE_AREA_SIZE 24 -#define LR_OFFSET 8 -#elif defined _CALL_AIX -#define LINKAGE_AREA_SIZE 52 -#define LR_OFFSET 8 +/* For some memory operations, we need a scratch that isn't R0. For the AIX + calling convention, we can re-use the TOC register since we'll be reloading + it at every call. Otherwise R12 will do nicely as neither a call-saved + register nor a parameter register. */ +#ifdef _CALL_AIX +# define TCG_REG_TMP1 TCG_REG_R2 #else -#define LINKAGE_AREA_SIZE 8 -#define LR_OFFSET 4 +# define TCG_REG_TMP1 TCG_REG_R12 #endif +/* For the 64-bit target, we don't like the 5 insn sequence needed to build + full 64-bit addresses. Better to have a base register to which we can + apply a 32-bit displacement. + + There are generally three items of interest: + (1) helper functions in the main executable, + (2) TranslationBlock data structures, + (3) the return address in the epilogue. + + For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer + will be inside the main executable, and thus near enough to make a + pointer to the epilogue be within 2GB of all helper functions. + + For softmmu, we'll let the kernel choose the address of code_gen_buffer, + and odds are it'll be somewhere close to the main malloc arena, and so + a pointer to the epilogue will be within 2GB of the TranslationBlocks. + + For --enable-pie, everything will be kinda near everything else, + somewhere in high memory. + + Thus we choose to keep the return address in a call-saved register. */ +#define TCG_REG_RA TCG_REG_R31 +#define USE_REG_RA (TCG_TARGET_REG_BITS == 64) + +/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ +#define SZP ((int)sizeof(void *)) + +/* Shorthand for size of a register. */ +#define SZR (TCG_TARGET_REG_BITS / 8) + +#define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 +#define TCG_CT_CONST_S32 0x400 +#define TCG_CT_CONST_U32 0x800 +#define TCG_CT_CONST_ZERO 0x1000 +#define TCG_CT_CONST_MONE 0x2000 + +static tcg_insn_unit *tb_ret_addr; + #ifndef GUEST_BASE #define GUEST_BASE 0 #endif +#include "elf.h" +static bool have_isa_2_06; +#define HAVE_ISA_2_06 have_isa_2_06 +#define HAVE_ISEL have_isa_2_06 + #ifdef CONFIG_USE_GUEST_BASE #define TCG_GUEST_BASE_REG 30 #else @@ -89,7 +133,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, + TCG_REG_R14, /* call saved registers */ TCG_REG_R15, TCG_REG_R16, TCG_REG_R17, @@ -99,32 +143,26 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R21, TCG_REG_R22, TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, -#ifdef TCG_TARGET_CALL_DARWIN - TCG_REG_R2, -#endif - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, -#ifndef TCG_TARGET_CALL_DARWIN + TCG_REG_R12, /* call clobbered, non-arguments */ TCG_REG_R11, -#endif - TCG_REG_R12, -#ifndef _CALL_SYSV + TCG_REG_R2, TCG_REG_R13, -#endif - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27 + TCG_REG_R10, /* call clobbered, arguments */ + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, }; static const int tcg_target_call_iarg_regs[] = { @@ -138,7 +176,7 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_R10 }; -static const int tcg_target_call_oarg_regs[2] = { +static const int tcg_target_call_oarg_regs[] = { TCG_REG_R3, TCG_REG_R4 }; @@ -146,10 +184,6 @@ static const int tcg_target_call_oarg_regs[2] = { static const int tcg_target_callee_save_regs[] = { #ifdef TCG_TARGET_CALL_DARWIN TCG_REG_R11, - TCG_REG_R13, -#endif -#ifdef _CALL_AIX - TCG_REG_R13, #endif TCG_REG_R14, TCG_REG_R15, @@ -171,50 +205,59 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_R31 }; -static uint32_t reloc_pc24_val (void *pc, tcg_target_long target) +static inline bool in_range_b(tcg_target_long target) { - tcg_target_long disp; - - disp = target - (tcg_target_long) pc; - if ((disp << 6) >> 6 != disp) - tcg_abort (); + return target == sextract64(target, 0, 26); +} +static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(in_range_b(disp)); return disp & 0x3fffffc; } -static void reloc_pc24 (void *pc, tcg_target_long target) +static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3fffffc) - | reloc_pc24_val (pc, target); + *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); } -static uint16_t reloc_pc14_val (void *pc, tcg_target_long target) +static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) { - tcg_target_long disp; + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(disp == (int16_t) disp); + return disp & 0xfffc; +} - disp = target - (tcg_target_long) pc; - if (disp != (int16_t) disp) - tcg_abort (); +static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); +} - return disp & 0xfffc; +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *s->code_ptr & 0x3fffffc; + tcg_out32(s, insn | retrans); } -static void reloc_pc14 (void *pc, tcg_target_long target) +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xfffc) - | reloc_pc14_val (pc, target); + unsigned retrans = *s->code_ptr & 0xfffc; + tcg_out32(s, insn | retrans); } -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - value += addend; + tcg_insn_unit *target = (tcg_insn_unit *)value; + + assert(addend == 0); switch (type) { case R_PPC_REL14: - reloc_pc14 (code_ptr, value); + reloc_pc14(code_ptr, target); break; case R_PPC_REL24: - reloc_pc24 (code_ptr, value); + reloc_pc24(code_ptr, target); break; default: tcg_abort(); @@ -236,59 +279,43 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; -#ifdef CONFIG_SOFTMMU case 'L': /* qemu_ld constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); -#if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7); -#endif #endif break; - case 'K': /* qemu_st[8..32] constraint */ + case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); -#if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7); -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8); -#endif #endif break; - case 'M': /* qemu_st64 constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8); -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R9); -#endif + case 'I': + ct->ct |= TCG_CT_CONST_S16; break; -#else - case 'L': - case 'K': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + case 'J': + ct->ct |= TCG_CT_CONST_U16; break; case 'M': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'T': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'U': + ct->ct |= TCG_CT_CONST_U32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; break; -#endif default: return -1; } @@ -298,51 +325,89 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static int tcg_target_const_match(tcg_target_long val, +static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { - int ct; + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + + /* The only 32-bit constraint we use aside from + TCG_CT_CONST is TCG_CT_CONST_S16. */ + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } return 0; } #define OPCD(opc) ((opc)<<26) -#define XO31(opc) (OPCD(31)|((opc)<<1)) #define XO19(opc) (OPCD(19)|((opc)<<1)) - -#define B OPCD(18) -#define BC OPCD(16) -#define LBZ OPCD(34) -#define LHZ OPCD(40) -#define LHA OPCD(42) -#define LWZ OPCD(32) -#define STB OPCD(38) -#define STH OPCD(44) -#define STW OPCD(36) - -#define ADDIC OPCD(12) -#define ADDI OPCD(14) -#define ADDIS OPCD(15) -#define ORI OPCD(24) -#define ORIS OPCD(25) -#define XORI OPCD(26) -#define XORIS OPCD(27) -#define ANDI OPCD(28) -#define ANDIS OPCD(29) -#define MULLI OPCD( 7) -#define CMPLI OPCD(10) -#define CMPI OPCD(11) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) +#define XO31(opc) (OPCD(31)|((opc)<<1)) +#define XO58(opc) (OPCD(58)|(opc)) +#define XO62(opc) (OPCD(62)|(opc)) + +#define B OPCD( 18) +#define BC OPCD( 16) +#define LBZ OPCD( 34) +#define LHZ OPCD( 40) +#define LHA OPCD( 42) +#define LWZ OPCD( 32) +#define STB OPCD( 38) +#define STH OPCD( 44) +#define STW OPCD( 36) + +#define STD XO62( 0) +#define STDU XO62( 1) +#define STDX XO31(149) + +#define LD XO58( 0) +#define LDX XO31( 21) +#define LDU XO58( 1) +#define LWA XO58( 2) +#define LWAX XO31(341) + +#define ADDIC OPCD( 12) +#define ADDI OPCD( 14) +#define ADDIS OPCD( 15) +#define ORI OPCD( 24) +#define ORIS OPCD( 25) +#define XORI OPCD( 26) +#define XORIS OPCD( 27) +#define ANDI OPCD( 28) +#define ANDIS OPCD( 29) +#define MULLI OPCD( 7) +#define CMPLI OPCD( 10) +#define CMPI OPCD( 11) #define SUBFIC OPCD( 8) -#define LWZU OPCD(33) -#define STWU OPCD(37) +#define LWZU OPCD( 33) +#define STWU OPCD( 37) + +#define RLWIMI OPCD( 20) +#define RLWINM OPCD( 21) +#define RLWNM OPCD( 23) -#define RLWIMI OPCD(20) -#define RLWINM OPCD(21) -#define RLWNM OPCD(23) +#define RLDICL MD30( 0) +#define RLDICR MD30( 1) +#define RLDIMI MD30( 3) +#define RLDCL MDS30( 8) #define BCLR XO19( 16) #define BCCTR XO19(528) @@ -354,16 +419,22 @@ static int tcg_target_const_match(tcg_target_long val, #define EXTSB XO31(954) #define EXTSH XO31(922) +#define EXTSW XO31(986) #define ADD XO31(266) #define ADDE XO31(138) +#define ADDME XO31(234) +#define ADDZE XO31(202) #define ADDC XO31( 10) #define AND XO31( 28) #define SUBF XO31( 40) #define SUBFC XO31( 8) #define SUBFE XO31(136) +#define SUBFME XO31(232) +#define SUBFZE XO31(200) #define OR XO31(444) #define XOR XO31(316) #define MULLW XO31(235) +#define MULHW XO31( 75) #define MULHWU XO31( 11) #define DIVW XO31(491) #define DIVWU XO31(459) @@ -371,21 +442,31 @@ static int tcg_target_const_match(tcg_target_long val, #define CMPL XO31( 32) #define LHBRX XO31(790) #define LWBRX XO31(534) +#define LDBRX XO31(532) #define STHBRX XO31(918) #define STWBRX XO31(662) +#define STDBRX XO31(660) #define MFSPR XO31(339) #define MTSPR XO31(467) #define SRAWI XO31(824) #define NEG XO31(104) #define MFCR XO31( 19) -#define CNTLZW XO31( 26) +#define MFOCRF (MFCR | (1u << 20)) #define NOR XO31(124) +#define CNTLZW XO31( 26) +#define CNTLZD XO31( 58) #define ANDC XO31( 60) #define ORC XO31(412) #define EQV XO31(284) #define NAND XO31(476) #define ISEL XO31( 15) +#define MULLD XO31(233) +#define MULHD XO31( 73) +#define MULHDU XO31( 9) +#define DIVD XO31(489) +#define DIVDU XO31(457) + #define LBZX XO31( 87) #define LHZX XO31(279) #define LHAX XO31(343) @@ -394,7 +475,7 @@ static int tcg_target_const_match(tcg_target_long val, #define STHX XO31(407) #define STWX XO31(151) -#define SPR(a,b) ((((a)<<5)|(b))<<11) +#define SPR(a, b) ((((a)<<5)|(b))<<11) #define LR SPR(8, 0) #define CTR SPR(9, 0) @@ -402,8 +483,15 @@ static int tcg_target_const_match(tcg_target_long val, #define SRW XO31(536) #define SRAW XO31(792) -#define TW XO31(4) -#define TRAP (TW | TO (31)) +#define SLD XO31( 27) +#define SRD XO31(539) +#define SRAD XO31(794) +#define SRADI XO31(413<<1) + +#define TW XO31( 4) +#define TRAP (TW | TO(31)) + +#define NOP ORI /* ori 0,0,0 */ #define RT(r) ((r)<<21) #define RS(r) ((r)<<21) @@ -414,21 +502,26 @@ static int tcg_target_const_match(tcg_target_long val, #define MB(b) ((b)<<6) #define ME(e) ((e)<<1) #define BO(o) ((o)<<21) +#define MB64(b) ((b)<<5) +#define FXM(b) (1 << (19 - (b))) #define LK 1 -#define TAB(t,a,b) (RT(t) | RA(a) | RB(b)) -#define SAB(s,a,b) (RS(s) | RA(a) | RB(b)) +#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) +#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) +#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) +#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) #define BF(n) ((n)<<23) #define BI(n, c) (((c)+((n)*4))<<16) #define BT(n, c) (((c)+((n)*4))<<21) #define BA(n, c) (((c)+((n)*4))<<16) #define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) -#define BO_COND_TRUE BO (12) -#define BO_COND_FALSE BO (4) -#define BO_ALWAYS BO (20) +#define BO_COND_TRUE BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS BO(20) enum { CR_LT, @@ -438,566 +531,351 @@ enum { }; static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI (7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, + [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, }; +/* The low bit here is set if the RA and RB fields must be inverted. */ +static const uint32_t tcg_to_isel[] = { + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(7, CR_LT), + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(7, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset); + static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); if (ret != arg) { tcg_out32(s, OR | SAB(arg, ret, arg)); } } -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) +static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb) { - if (arg == (int16_t) arg) - tcg_out32 (s, ADDI | RT (ret) | RA (0) | (arg & 0xffff)); - else { - tcg_out32 (s, ADDIS | RT (ret) | RA (0) | ((arg >> 16) & 0xffff)); - if (arg & 0xffff) - tcg_out32 (s, ORI | RS (ret) | RA (ret) | (arg & 0xffff)); - } + assert(TCG_TARGET_REG_BITS == 64); + sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); + mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); } -static void tcg_out_ldst (TCGContext *s, int ret, int addr, - int offset, int op1, int op2) +static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me) { - if (offset == (int16_t) offset) - tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, offset); - tcg_out32 (s, op2 | RT (ret) | RA (addr) | RB (0)); - } + tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); } -static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) +static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) { - tcg_target_long disp; - - disp = target - (tcg_target_long) s->code_ptr; - if ((disp << 6) >> 6 == disp) - tcg_out32 (s, B | (disp & 0x3fffffc) | mask); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, (tcg_target_long) target); - tcg_out32 (s, MTSPR | RS (0) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS | mask); - } + tcg_out_rld(s, RLDICL, dst, src, 0, 32); } -static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg, - int lk) +static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) { -#ifdef _CALL_AIX - int reg; - - if (const_arg) { - reg = 2; - tcg_out_movi (s, TCG_TYPE_I32, reg, arg); - } - else reg = arg; - - tcg_out32 (s, LWZ | RT (0) | RA (reg)); - tcg_out32 (s, MTSPR | RA (0) | CTR); - tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4); - tcg_out32 (s, BCCTR | BO_ALWAYS | lk); -#else - if (const_arg) { - tcg_out_b (s, lk, arg); - } - else { - tcg_out32 (s, MTSPR | RS (arg) | LR); - tcg_out32 (s, BCLR | BO_ALWAYS | lk); - } -#endif + tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); } -#if defined(CONFIG_SOFTMMU) - -static void add_qemu_ldst_label (TCGContext *s, - int is_ld, - TCGMemOp opc, - int data_reg, - int data_reg2, - int addrlo_reg, - int addrhi_reg, - int mem_index, - uint8_t *raddr, - uint8_t *label_ptr) +static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) { - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->opc = opc; - label->datalo_reg = data_reg; - label->datahi_reg = data_reg2; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->mem_index = mem_index; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; + tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); } -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static const void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static const void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -static void *ld_trampolines[16]; -static void *st_trampolines[16]; - -/* Perform the TLB load and compare. Branches to the slow path, placing the - address of the branch in *LABEL_PTR. Loads the addend of the TLB into R0. - Clobbers R1 and R2. */ - -static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, - TCGReg addrlo, TCGReg addrhi, TCGMemOp s_bits, - int mem_index, int is_load, uint8_t **label_ptr) +static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) { - int cmp_off = - (is_load - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - uint16_t retranst; - TCGReg base = TCG_AREG0; - - /* Extract the page index, shifted into place for tlb index. */ - tcg_out32(s, (RLWINM - | RA(r0) - | RS(addrlo) - | SH(32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) - | MB(32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) - | ME(31 - CPU_TLB_ENTRY_BITS))); - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out32(s, ADDI | RT(r1) | RA(base) | 0x7ff0); - base = r1; - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Clear the non-page, non-alignment bits from the address. */ - tcg_out32(s, (RLWINM - | RA(r2) - | RS(addrlo) - | SH(0) - | MB((32 - s_bits) & 31) - | ME(31 - TARGET_PAGE_BITS))); - - tcg_out32(s, ADD | RT(r0) | RA(r0) | RB(base)); - base = r0; - - /* Load the tlb comparator. */ - tcg_out32(s, LWZ | RT(r1) | RA(base) | (cmp_off & 0xffff)); - - tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1)); - - if (TARGET_LONG_BITS == 64) { - tcg_out32(s, LWZ | RT(r1) | RA(base) | ((cmp_off + 4) & 0xffff)); - } + tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); +} - /* Load the tlb addend for use on the fast path. - Do this asap to minimize load delay. */ - tcg_out32(s, LWZ | RT(r0) | RA(base) | (add_off & 0xffff)); +static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); +} - if (TARGET_LONG_BITS == 64) { - tcg_out32(s, CMP | BF(6) | RA(addrhi) | RB(r1)); - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); +static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (arg == (int16_t) arg) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + } else { + tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } } - - /* Use a conditional branch-and-link so that we load a pointer to - somewhere within the current opcode, for passing on to the helper. - This address cannot be used for a tail call, but it's shorter - than forming an address from scratch. */ - *label_ptr = s->code_ptr; - retranst = ((uint16_t *) s->code_ptr)[1] & ~3; - tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); } -#endif -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long arg) { - TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused)); - TCGMemOp opc, bswap; -#ifdef CONFIG_SOFTMMU - int mem_index; - uint8_t *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - opc = *args++; - bswap = opc & MO_BSWAP; + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { + tcg_out_movi32(s, ret, arg); + } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } else { + int32_t high; -#ifdef CONFIG_SOFTMMU - mem_index = *args; - tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, - addrhi, opc & MO_SIZE, mem_index, 0, &label_ptr); - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; -#endif + if (USE_REG_RA) { + intptr_t diff = arg - (intptr_t)tb_ret_addr; + if (diff == (int32_t)diff) { + tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); + return; + } + } - switch (opc & MO_SSIZE) { - default: - case MO_UB: - tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); - break; - case MO_SB: - tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, EXTSB | RA(datalo) | RS(datalo)); - break; - case MO_UW: - tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo)); - break; - case MO_SW: - if (bswap) { - tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, EXTSH | RA(datalo) | RS(datalo)); - } else { - tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo)); + high = arg >> 31 >> 1; + tcg_out_movi32(s, ret, high); + if (high) { + tcg_out_shli64(s, ret, ret, 32); } - break; - case MO_UL: - tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo)); - break; - case MO_Q: - if (bswap) { - tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); - tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); - tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); - tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); - } else if (addrlo == datahi) { - tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); - tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); - } else { - tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); - tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); + if (arg & 0xffff0000) { + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); } - break; } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, - addrhi, mem_index, s->code_ptr, label_ptr); -#endif } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static bool mask_operand(uint32_t c, int *mb, int *me) { - TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused)); - TCGMemOp opc, bswap, s_bits; -#ifdef CONFIG_SOFTMMU - int mem_index; - uint8_t *label_ptr; -#endif + uint32_t lsb, test; + + /* Accept a bit pattern like: + 0....01....1 + 1....10....0 + 0..01..10..0 + Keep track of the transitions. */ + if (c == 0 || c == -1) { + return false; + } + test = c; + lsb = test & -test; + test += lsb; + if (test & (test - 1)) { + return false; + } - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - opc = *args++; - bswap = opc & MO_BSWAP; - s_bits = opc & MO_SIZE; + *me = clz32(lsb); + *mb = test ? clz32(test & -test) + 1 : 0; + return true; +} -#ifdef CONFIG_SOFTMMU - mem_index = *args; - tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, - addrhi, s_bits, mem_index, 0, &label_ptr); - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; -#endif +static bool mask64_operand(uint64_t c, int *mb, int *me) +{ + uint64_t lsb; - switch (s_bits) { - case MO_8: - tcg_out32(s, STBX | SAB(datalo, rbase, addrlo)); - break; - case MO_16: - tcg_out32(s, (bswap ? STHBRX : STHX) | SAB(datalo, rbase, addrlo)); - break; - case MO_32: - default: - tcg_out32(s, (bswap ? STWBRX : STWX) | SAB(datalo, rbase, addrlo)); - break; - case MO_64: - if (bswap) { - tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); - tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); - tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); - tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); - tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); - } else { - tcg_out32(s, STW | RS(datahi) | RA(addrlo)); - tcg_out32(s, STW | RS(datalo) | RA(addrlo) | 4); - } - break; + if (c == 0) { + return false; } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, - mem_index, s->code_ptr, label_ptr); -#endif + lsb = c & -c; + /* Accept 1..10..0. */ + if (c == -lsb) { + *mb = 0; + *me = clz64(lsb); + return true; + } + /* Accept 0..01..1. */ + if (lsb == 1 && (c & (c + 1)) == 0) { + *mb = clz64(c + 1) + 1; + *me = 63; + return true; + } + return false; } -#if defined(CONFIG_SOFTMMU) -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { - TCGReg ir, datalo, datahi; - TCGMemOp opc = l->opc; - - reloc_pc14 (l->label_ptr[0], (uintptr_t)s->code_ptr); + int mb, me; - ir = TCG_REG_R4; - if (TARGET_LONG_BITS == 32) { - tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrhi_reg); - tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } - tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); - tcg_out32(s, MFSPR | RT(ir++) | LR); - tcg_out_b(s, LK, (uintptr_t)ld_trampolines[opc & ~MO_SIGN]); +} - datalo = l->datalo_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3)); - break; - case MO_SW: - tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3)); - break; - default: - tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3); - break; - case MO_Q: - datahi = l->datahi_reg; - if (datalo != TCG_REG_R3) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); - } else if (datahi != TCG_REG_R4) { - tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); - tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) +{ + int mb, me; + + assert(TCG_TARGET_REG_BITS == 64); + if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else if (mask64_operand(c, &mb, &me)) { + if (mb == 0) { + tcg_out_rld(s, RLDICR, dst, src, 0, me); } else { - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); - tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0); + tcg_out_rld(s, RLDICL, dst, src, 0, mb); } - break; + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } - tcg_out_b (s, 0, (uintptr_t)l->raddr); } -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, + int op_lo, int op_hi) { - TCGReg ir, datalo; - TCGMemOp opc = l->opc; - - reloc_pc14 (l->label_ptr[0], (tcg_target_long) s->code_ptr); - - ir = TCG_REG_R4; - if (TARGET_LONG_BITS == 32) { - tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); - } else { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); + if (c >> 16) { + tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); + src = dst; } - - datalo = l->datalo_reg; - switch (opc & MO_SIZE) { - case MO_8: - tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) - | SH (0) | MB (24) | ME (31))); - break; - case MO_16: - tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) - | SH (0) | MB (16) | ME (31))); - break; - default: - tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); - break; - case MO_64: -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, ir++, l->datahi_reg); - tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); - break; + if (c & 0xffff) { + tcg_out32(s, op_lo | SAI(src, dst, c)); + src = dst; } - ir++; - - tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); - tcg_out32(s, MFSPR | RT(ir++) | LR); - tcg_out_b(s, LK, (uintptr_t)st_trampolines[opc]); - tcg_out_b(s, 0, (uintptr_t)l->raddr); } -#endif -#ifdef CONFIG_SOFTMMU -static void emit_ldst_trampoline (TCGContext *s, const void *ptr) +static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { - tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); - tcg_out_call (s, (tcg_target_long) ptr, 1, 0); + tcg_out_zori32(s, dst, src, c, ORI, ORIS); } -#endif -static void tcg_target_qemu_prologue (TCGContext *s) +static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { - int i, frame_size; - - frame_size = 0 - + LINKAGE_AREA_SIZE - + TCG_STATIC_CALL_ARGS_SIZE - + ARRAY_SIZE (tcg_target_callee_save_regs) * 4 - + CPU_TEMP_BUF_NLONGS * sizeof(long) - ; - frame_size = (frame_size + 15) & ~15; - - tcg_set_frame(s, TCG_REG_CALL_STACK, frame_size - - CPU_TEMP_BUF_NLONGS * sizeof(long), - CPU_TEMP_BUF_NLONGS * sizeof(long)); + tcg_out_zori32(s, dst, src, c, XORI, XORIS); +} -#ifdef _CALL_AIX - { - uint32_t addr; +static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_pcrel_diff(s, target); + if (in_range_b(disp)) { + tcg_out32(s, B | (disp & 0x3fffffc) | mask); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | mask); + } +} - /* First emit adhoc function descriptor */ - addr = (uint32_t) s->code_ptr + 12; - tcg_out32 (s, addr); /* entry point */ - s->code_ptr += 8; /* skip TOC and environment pointer */ +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset) +{ + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + bool is_store = false; + TCGReg rs = TCG_REG_TMP1; + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + break; + } + break; + case STD: + align = 3; + /* FALLTHRU */ + case STB: case STH: case STW: + is_store = true; + break; } -#endif - tcg_out32 (s, MFSPR | RT (0) | LR); - tcg_out32 (s, STWU | RS (1) | RA (1) | (-frame_size & 0xffff)); - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (STW - | RS (tcg_target_callee_save_regs[i]) - | RA (1) - | (i * 4 + LINKAGE_AREA_SIZE + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); - tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size + LR_OFFSET)); -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE) { - tcg_out_movi (s, TCG_TYPE_I32, TCG_GUEST_BASE_REG, GUEST_BASE); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + if (rs == base) { + rs = TCG_REG_R0; + } + tcg_debug_assert(!is_store || rs != rt); + tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); + tcg_out32(s, opx | TAB(rt, base, rs)); + return; } -#endif - tcg_out_mov (s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32 (s, MTSPR | RS (tcg_target_call_iarg_regs[1]) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS); - tb_ret_addr = s->code_ptr; - - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (LWZ - | RT (tcg_target_callee_save_regs[i]) - | RA (1) - | (i * 4 + LINKAGE_AREA_SIZE + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); - tcg_out32 (s, LWZ | RT (0) | RA (1) | (frame_size + LR_OFFSET)); - tcg_out32 (s, MTSPR | RS (0) | LR); - tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); - tcg_out32 (s, BCLR | BO_ALWAYS); + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; -#ifdef CONFIG_SOFTMMU - for (i = 0; i < 16; ++i) { - if (qemu_ld_helpers[i]) { - ld_trampolines[i] = s->code_ptr; - emit_ldst_trampoline(s, qemu_ld_helpers[i]); - } - if (qemu_st_helpers[i]) { - st_trampolines[i] = s->code_ptr; - emit_ldst_trampoline(s, qemu_st_helpers[i]); - } + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); } -#endif } -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - intptr_t arg2) +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) { - tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX); -} + int opi, opx; -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - intptr_t arg2) -{ - tcg_out_ldst (s, arg, arg1, arg2, STW, STWX); + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = LWZ, opx = LWZX; + } else { + opi = LD, opx = LDX; + } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); } -static void ppc_addi (TCGContext *s, int rt, int ra, tcg_target_long si) +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) { - if (!si && rt == ra) - return; + int opi, opx; - if (si == (int16_t) si) - tcg_out32 (s, ADDI | RT (rt) | RA (ra) | (si & 0xffff)); - else { - uint16_t h = ((si >> 16) & 0xffff) + ((uint16_t) si >> 15); - tcg_out32 (s, ADDIS | RT (rt) | RA (ra) | h); - tcg_out32 (s, ADDI | RT (rt) | RA (rt) | (si & 0xffff)); + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = STW, opx = STWX; + } else { + opi = STD, opx = STDX; } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); } -static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr) +static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, + int const_arg2, int cr, TCGType type) { int imm; uint32_t op; + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + + /* Simplify the comparisons below wrt CMPI. */ + if (type == TCG_TYPE_I32) { + arg2 = (int32_t)arg2; + } + switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: @@ -1006,8 +884,7 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, op = CMPI; imm = 1; break; - } - else if ((uint16_t) arg2 == arg2) { + } else if ((uint16_t) arg2 == arg2) { op = CMPLI; imm = 1; break; @@ -1048,149 +925,144 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, break; default: - tcg_abort (); + tcg_abort(); } - op |= BF (cr); + op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - if (imm) - tcg_out32 (s, op | RA (arg1) | (arg2 & 0xffff)); - else { + if (imm) { + tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); + } else { if (const_arg2) { - tcg_out_movi (s, TCG_TYPE_I32, 0, arg2); - tcg_out32 (s, op | RA (arg1) | RB (0)); + tcg_out_movi(s, type, TCG_REG_R0, arg2); + arg2 = TCG_REG_R0; } - else - tcg_out32 (s, op | RA (arg1) | RB (arg2)); + tcg_out32(s, op | RA(arg1) | RB(arg2)); } - } -static void tcg_out_bc (TCGContext *s, int bc, int label_index) +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src) { - TCGLabel *l = &s->labels[label_index]; - - if (l->has_value) - tcg_out32 (s, bc | reloc_pc14_val (s->code_ptr, l->u.value)); - else { - uint16_t val = *(uint16_t *) &s->code_ptr[2]; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, bc | (val & 0xfffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL14, label_index, 0); + if (type == TCG_TYPE_I32) { + tcg_out32(s, CNTLZW | RS(src) | RA(dst)); + tcg_out_shri32(s, dst, dst, 5); + } else { + tcg_out32(s, CNTLZD | RS(src) | RA(dst)); + tcg_out_shri64(s, dst, dst, 6); } } -static void tcg_out_cr7eq_from_cond (TCGContext *s, const TCGArg *args, - const int *const_args) +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) { - TCGCond cond = args[4]; - int op; - struct { int bit1; int bit2; int cond2; } bits[] = { - [TCG_COND_LT ] = { CR_LT, CR_LT, TCG_COND_LT }, - [TCG_COND_LE ] = { CR_LT, CR_GT, TCG_COND_LT }, - [TCG_COND_GT ] = { CR_GT, CR_GT, TCG_COND_GT }, - [TCG_COND_GE ] = { CR_GT, CR_LT, TCG_COND_GT }, - [TCG_COND_LTU] = { CR_LT, CR_LT, TCG_COND_LTU }, - [TCG_COND_LEU] = { CR_LT, CR_GT, TCG_COND_LTU }, - [TCG_COND_GTU] = { CR_GT, CR_GT, TCG_COND_GTU }, - [TCG_COND_GEU] = { CR_GT, CR_LT, TCG_COND_GTU }, - }, *b = &bits[cond]; + /* X != 0 implies X + -1 generates a carry. Extra addition + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ + if (dst != src) { + tcg_out32(s, ADDIC | TAI(dst, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, dst, src)); + } else { + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); + } +} - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - op = (cond == TCG_COND_EQ) ? CRAND : CRNAND; - tcg_out_cmp (s, cond, args[0], args[2], const_args[2], 6); - tcg_out_cmp (s, cond, args[1], args[3], const_args[3], 7); - tcg_out32 (s, op | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); - break; - case TCG_COND_LT: - case TCG_COND_LE: - case TCG_COND_GT: - case TCG_COND_GE: - case TCG_COND_LTU: - case TCG_COND_LEU: - case TCG_COND_GTU: - case TCG_COND_GEU: - op = (b->bit1 != b->bit2) ? CRANDC : CRAND; - tcg_out_cmp (s, b->cond2, args[1], args[3], const_args[3], 5); - tcg_out_cmp (s, tcg_unsigned_cond (cond), args[0], args[2], - const_args[2], 7); - tcg_out32 (s, op | BT (7, CR_EQ) | BA (5, CR_EQ) | BB (7, b->bit2)); - tcg_out32 (s, CROR | BT (7, CR_EQ) | BA (5, b->bit1) | BB (7, CR_EQ)); - break; - default: - tcg_abort(); +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, + bool const_arg2) +{ + if (const_arg2) { + if ((uint32_t)arg2 == arg2) { + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); + } + } else { + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); } + return TCG_REG_R0; } -static void tcg_out_setcond (TCGContext *s, TCGCond cond, TCGArg arg0, - TCGArg arg1, TCGArg arg2, int const_arg2) +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) { - int crop, sh, arg; + int crop, sh; - switch (cond) { - case TCG_COND_EQ: - if (const_arg2) { - if (!arg2) { - arg = arg1; - } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2); - } - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } - } - } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); - } - tcg_out32 (s, CNTLZW | RS (arg) | RA (0)); - tcg_out32 (s, (RLWINM - | RA (arg0) - | RS (0) - | SH (27) - | MB (5) - | ME (31) - ) - ); - break; + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - case TCG_COND_NE: - if (const_arg2) { - if (!arg2) { - arg = arg1; + /* Ignore high bits of a potential constant arg2. */ + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } + + /* Handle common and trivial cases before handling anything else. */ + if (arg2 == 0) { + switch (cond) { + case TCG_COND_EQ: + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + case TCG_COND_NE: + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2); - } - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + case TCG_COND_GE: + tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); + arg1 = arg0; + /* FALLTHRU */ + case TCG_COND_LT: + /* Extract the sign bit. */ + if (type == TCG_TYPE_I32) { + tcg_out_shri32(s, arg0, arg1, 31); + } else { + tcg_out_shri64(s, arg0, arg1, 63); } + return; + default: + break; } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); - } + } + + /* If we have ISEL, we can implement everything with 3 or 4 insns. + All other cases below are also at least 3 insns, so speed up the + code generator by not considering them and always using ISEL. */ + if (HAVE_ISEL) { + int isel, tab; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - if (arg == arg1 && arg1 == arg0) { - tcg_out32 (s, ADDIC | RT (0) | RA (arg) | 0xffff); - tcg_out32 (s, SUBFE | TAB (arg0, 0, arg)); + isel = tcg_to_isel[cond]; + + tcg_out_movi(s, type, arg0, 1); + if (isel & 1) { + /* arg0 = (bc ? 0 : 1) */ + tab = TAB(arg0, 0, arg0); + isel &= ~1; + } else { + /* arg0 = (bc ? 1 : 0) */ + tcg_out_movi(s, type, TCG_REG_R0, 0); + tab = TAB(arg0, arg0, TCG_REG_R0); } - else { - tcg_out32 (s, ADDIC | RT (arg0) | RA (arg) | 0xffff); - tcg_out32 (s, SUBFE | TAB (arg0, arg0, arg)); + tcg_out32(s, isel | tab); + return; + } + + switch (cond) { + case TCG_COND_EQ: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + + case TCG_COND_NE: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + /* Discard the high bits only once, rather than both inputs. */ + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } - break; + tcg_out_setcond_ne0(s, arg0, arg1); + return; case TCG_COND_GT: case TCG_COND_GTU: @@ -1207,730 +1079,1466 @@ static void tcg_out_setcond (TCGContext *s, TCGCond cond, TCGArg arg0, case TCG_COND_GE: case TCG_COND_GEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); goto crtest; case TCG_COND_LE: case TCG_COND_LEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); crtest: - tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7); - if (crop) tcg_out32 (s, crop); - tcg_out32 (s, MFCR | RT (0)); - tcg_out32 (s, (RLWINM - | RA (arg0) - | RS (0) - | SH (sh) - | MB (31) - | ME (31) - ) - ); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + if (crop) { + tcg_out32(s, crop); + } + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); break; default: - tcg_abort (); + tcg_abort(); } } -static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, - const int *const_args) +static void tcg_out_bc(TCGContext *s, int bc, int label_index) { - tcg_out_cr7eq_from_cond (s, args + 1, const_args + 1); - tcg_out32 (s, MFCR | RT (0)); - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (0) - | SH (31) - | MB (31) - | ME (31) - ) - ); + TCGLabel *l = &s->labels[label_index]; + + if (l->has_value) { + tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0); + tcg_out_bc_noaddr(s, bc); + } } -static void tcg_out_movcond (TCGContext *s, TCGCond cond, - TCGArg dest, - TCGArg c1, TCGArg c2, - TCGArg v1, TCGArg v2, - int const_c2) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, TCGType type) { - tcg_out_cmp (s, cond, c1, c2, const_c2, 7); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_bc(s, tcg_to_bc[cond], label_index); +} - if (1) { - /* At least here on 7747A bit twiddling hacks are outperformed - by jumpy code (the testing was not scientific) */ +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, + TCGArg v2, bool const_c2) +{ + /* If for some reason both inputs are zero, don't produce bad code. */ + if (v1 == 0 && v2 == 0) { + tcg_out_movi(s, type, dest, 0); + return; + } + + tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + + if (HAVE_ISEL) { + int isel = tcg_to_isel[cond]; + + /* Swap the V operands if the operation indicates inversion. */ + if (isel & 1) { + int t = v1; + v1 = v2; + v2 = t; + isel &= ~1; + } + /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ + if (v2 == 0) { + tcg_out_movi(s, type, TCG_REG_R0, 0); + } + tcg_out32(s, isel | TAB(dest, v1, v2)); + } else { if (dest == v2) { - cond = tcg_invert_cond (cond); + cond = tcg_invert_cond(cond); v2 = v1; - } - else { - if (dest != v1) { - tcg_out_mov (s, TCG_TYPE_I32, dest, v1); + } else if (dest != v1) { + if (v1 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v1); } } /* Branch forward over one insn */ - tcg_out32 (s, tcg_to_bc[cond] | 8); - tcg_out_mov (s, TCG_TYPE_I32, dest, v2); + tcg_out32(s, tcg_to_bc[cond] | 8); + if (v2 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v2); + } } - else { - /* isel version, "if (1)" above should be replaced once a way - to figure out availability of isel on the underlying - hardware is found */ - int tab, bc; +} - switch (cond) { - case TCG_COND_EQ: - tab = TAB (dest, v1, v2); - bc = CR_EQ; - break; - case TCG_COND_NE: - tab = TAB (dest, v2, v1); - bc = CR_EQ; - break; - case TCG_COND_LTU: - case TCG_COND_LT: - tab = TAB (dest, v1, v2); - bc = CR_LT; - break; - case TCG_COND_GEU: - case TCG_COND_GE: - tab = TAB (dest, v2, v1); - bc = CR_LT; - break; - case TCG_COND_LEU: - case TCG_COND_LE: - tab = TAB (dest, v2, v1); - bc = CR_GT; - break; - case TCG_COND_GTU: - case TCG_COND_GT: - tab = TAB (dest, v1, v2); - bc = CR_GT; - break; - default: - tcg_abort (); - } - tcg_out32 (s, ISEL | tab | ((bc + 28) << 6)); +static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + static const struct { uint8_t bit1, bit2; } bits[] = { + [TCG_COND_LT ] = { CR_LT, CR_LT }, + [TCG_COND_LE ] = { CR_LT, CR_GT }, + [TCG_COND_GT ] = { CR_GT, CR_GT }, + [TCG_COND_GE ] = { CR_GT, CR_LT }, + [TCG_COND_LTU] = { CR_LT, CR_LT }, + [TCG_COND_LEU] = { CR_LT, CR_GT }, + [TCG_COND_GTU] = { CR_GT, CR_GT }, + [TCG_COND_GEU] = { CR_GT, CR_LT }, + }; + + TCGCond cond = args[4], cond2; + TCGArg al, ah, bl, bh; + int blconst, bhconst; + int op, bit1, bit2; + + al = args[0]; + ah = args[1]; + bl = args[2]; + bh = args[3]; + blconst = const_args[2]; + bhconst = const_args[3]; + + switch (cond) { + case TCG_COND_EQ: + op = CRAND; + goto do_equality; + case TCG_COND_NE: + op = CRNAND; + do_equality: + tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + break; + + case TCG_COND_LT: + case TCG_COND_LE: + case TCG_COND_GT: + case TCG_COND_GE: + case TCG_COND_LTU: + case TCG_COND_LEU: + case TCG_COND_GTU: + case TCG_COND_GEU: + bit1 = bits[cond].bit1; + bit2 = bits[cond].bit2; + op = (bit1 != bit2 ? CRANDC : CRAND); + cond2 = tcg_unsigned_cond(cond); + + tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); + tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); + break; + + default: + tcg_abort(); } } -static void tcg_out_brcond (TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index) +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) { - tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7); - tcg_out_bc (s, tcg_to_bc[cond], label_index); + tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, const int *const_args) { - tcg_out_cr7eq_from_cond (s, args, const_args); - tcg_out_bc (s, (BC | BI (7, CR_EQ) | BO_COND_TRUE), args[5]); + tcg_out_cmp2(s, args, const_args); + tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, args[5]); } -void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) +void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - uint32_t *ptr; - long disp = addr - jmp_addr; - unsigned long patch_size; - - ptr = (uint32_t *)jmp_addr; - - if ((disp << 6) >> 6 != disp) { - ptr[0] = 0x3c000000 | (addr >> 16); /* lis 0,addr@ha */ - ptr[1] = 0x60000000 | (addr & 0xffff); /* la 0,addr@l(0) */ - ptr[2] = 0x7c0903a6; /* mtctr 0 */ - ptr[3] = 0x4e800420; /* brctr */ - patch_size = 16; + TCGContext s; + + s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; + tcg_out_b(&s, 0, (tcg_insn_unit *)addr); + flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ +#ifdef _CALL_AIX + /* Look through the descriptor. If the branch is in range, and we + don't have to spend too much effort on building the toc. */ + void *tgt = ((void **)target)[0]; + uintptr_t toc = ((uintptr_t *)target)[1]; + intptr_t diff = tcg_pcrel_diff(s, tgt); + + if (in_range_b(diff) && toc == (uint32_t)toc) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); + tcg_out_b(s, LK, tgt); } else { - /* patch the branch destination */ - if (disp != 16) { - *ptr = 0x48000000 | (disp & 0x03fffffc); /* b disp */ - patch_size = 4; + /* Fold the low bits of the constant into the addresses below. */ + intptr_t arg = (intptr_t)target; + int ofs = (int16_t)arg; + + if (ofs + 8 < 0x8000) { + arg -= ofs; } else { - ptr[0] = 0x60000000; /* nop */ - ptr[1] = 0x60000000; - ptr[2] = 0x60000000; - ptr[3] = 0x60000000; - patch_size = 16; + ofs = 0; } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); + tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); } - /* flush icache */ - flush_icache_range(jmp_addr, jmp_addr + patch_size); +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + intptr_t diff; + + /* In the ELFv2 ABI, we have to set up r12 to contain the destination + address, which the callee uses to compute its TOC address. */ + /* FIXME: when the branch is in range, we could avoid r12 load if we + knew that the destination uses the same TOC, and what its local + entry point offset is. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); + + diff = tcg_pcrel_diff(s, target); + if (in_range_b(diff)) { + tcg_out_b(s, LK, target); + } else { + tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); + } +#else + tcg_out_b(s, LK, target); +#endif +} + +static const uint32_t qemu_ldx_opc[16] = { + [MO_UB] = LBZX, + [MO_UW] = LHZX, + [MO_UL] = LWZX, + [MO_Q] = LDX, + [MO_SW] = LHAX, + [MO_SL] = LWAX, + [MO_BSWAP | MO_UB] = LBZX, + [MO_BSWAP | MO_UW] = LHBRX, + [MO_BSWAP | MO_UL] = LWBRX, + [MO_BSWAP | MO_Q] = LDBRX, +}; + +static const uint32_t qemu_stx_opc[16] = { + [MO_UB] = STBX, + [MO_UW] = STHX, + [MO_UL] = STWX, + [MO_Q] = STDX, + [MO_BSWAP | MO_UB] = STBX, + [MO_BSWAP | MO_UW] = STHBRX, + [MO_BSWAP | MO_UL] = STWBRX, + [MO_BSWAP | MO_Q] = STDBRX, +}; + +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. Places the result of the comparison + in CR7, loads the addend of the TLB into R3, and returns the register + containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, + TCGReg addrlo, TCGReg addrhi, + int mem_index, bool is_read) +{ + int cmp_off + = (is_read + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + TCGReg base = TCG_AREG0; + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 32) { + /* Zero-extend the address into a place helpful for further use. */ + tcg_out_ext32u(s, TCG_REG_R4, addrlo); + addrlo = TCG_REG_R4; + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, + 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); + } + } + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); + base = TCG_REG_TMP1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Extraction and shifting, part 2. */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); + } + + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + + /* Load the tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); + } + + /* Load the TLB addend for use on the fast path. Do this asap + to minimize any load use delay. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); + + /* Clear the non-page, non-alignment bits from the address. */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (!s_bits) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, + 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_I32); + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_TL); + } + + return addrlo; +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + TCGReg datalo_reg, TCGReg datahi_reg, + TCGReg addrlo_reg, TCGReg addrhi_reg, + int mem_index, tcg_insn_unit *raddr, + tcg_insn_unit *lptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = datalo_reg; + label->datahi_reg = datahi_reg; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = lptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOp opc = lb->opc; + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, lb->mem_index); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { + tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); + } else if (opc & MO_SIGN) { + uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; + tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); + } + + tcg_out_b(s, 0, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOp opc = lb->opc; + TCGMemOp s_bits = opc & MO_SIZE; + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32) { + switch (s_bits) { + case MO_64: +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + /* FALLTHRU */ + case MO_32: + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + break; + default: + tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); + break; + } + } else { + if (s_bits == MO_64) { + tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); + } else { + tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); + } + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, lb->mem_index); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_st_helpers[opc]); + + tcg_out_b(s, 0, lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + opc = *args++; + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = *args; + addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + } else { + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_ldx_opc[opc]; + if (!HAVE_ISA_2_06 && insn == LDBRX) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); + tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); + } else if (insn) { + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + } else { + insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(datalo) | RS(datalo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + opc = *args++; + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = *args; + addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | TAI(datahi, addrlo, 0)); + tcg_out32(s, STW | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_stx_opc[opc]; + if (!HAVE_ISA_2_06 && insn == STDBRX) { + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); + tcg_out_shri64(s, TCG_REG_R0, datalo, 32); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); + } else { + tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#endif +} + +/* Parameters for function call generation, used in tcg.c. */ +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_EXTEND_ARGS 1 + +#ifdef _CALL_AIX +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (1 * SZR) +# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) +#elif defined(TCG_TARGET_CALL_DARWIN) +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (2 * SZR) +#elif TCG_TARGET_REG_BITS == 64 +# if defined(_CALL_ELF) && _CALL_ELF == 2 +# define LINK_AREA_SIZE (4 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#else /* TCG_TARGET_REG_BITS == 32 */ +# if defined(_CALL_SYSV) +# define LINK_AREA_SIZE (2 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#endif +#ifndef LR_OFFSET +# error "Unhandled abi" +#endif +#ifndef TCG_TARGET_CALL_STACK_OFFSET +# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE +#endif + +#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) + +#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_SIZE \ + + REG_SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + +#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + +#ifdef _CALL_AIX + void **desc = (void **)s->code_ptr; + desc[0] = desc + 2; /* entry point */ + desc[1] = 0; /* environment pointer */ + s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ +#endif + + tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, + CPU_TEMP_BUF_SIZE); + + /* Prologue */ + tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); + tcg_out32(s, (SZR == 8 ? STDU : STWU) + | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + +#ifdef CONFIG_USE_GUEST_BASE + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + + if (USE_REG_RA) { +#ifdef _CALL_AIX + /* Make the caller load the value as the TOC into R2. */ + tb_ret_addr = s->code_ptr + 2; + desc[1] = tb_ret_addr; + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); + tcg_out32(s, BCCTR | BO_ALWAYS); +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + /* Compute from the incoming R12 value. */ + tb_ret_addr = s->code_ptr + 2; + tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, + tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); + tcg_out32(s, BCCTR | BO_ALWAYS); +#else + /* Reserve max 5 insns for the constant load. */ + tb_ret_addr = s->code_ptr + 6; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); + tcg_out32(s, BCCTR | BO_ALWAYS); + while (s->code_ptr < tb_ret_addr) { + tcg_out32(s, NOP); + } +#endif + } else { + tcg_out32(s, BCCTR | BO_ALWAYS); + tb_ret_addr = s->code_ptr; + } + + /* Epilogue */ + assert(tb_ret_addr == s->code_ptr); + + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); + tcg_out32(s, BCLR | BO_ALWAYS); } static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { + TCGArg a0, a1, a2; + int c; + switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi (s, TCG_TYPE_I32, TCG_REG_R3, args[0]); - tcg_out_b (s, 0, (tcg_target_long) tb_ret_addr); + if (USE_REG_RA) { + ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); + + /* If we can use a direct branch, otherwise use the value in RA. + Note that the direct branch is always forward. If it's in + range now, it'll still be in range after the movi. Don't + bother about the 20 bytes where the test here fails but it + would succeed below. */ + if (!in_range_b(disp)) { + tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out32(s, BCCTR | BO_ALWAYS); + break; + } + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { - /* direct jump method */ - - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - s->code_ptr += 16; - } - else { - tcg_abort (); + /* Direct jump method. */ + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->code_ptr += 7; + } else { + /* Indirect jump method. */ + tcg_abort(); } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: { TCGLabel *l = &s->labels[args[0]]; if (l->has_value) { - tcg_out_b (s, 0, l->u.value); - } - else { - uint32_t val = *(uint32_t *) s->code_ptr; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, B | (val & 0x3fffffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL24, args[0], 0); + tcg_out_b(s, 0, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0); + tcg_out_b_noaddr(s, B); } } break; - case INDEX_op_call: - tcg_out_call (s, args[0], const_args[0], LK); - break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); - break; case INDEX_op_ld8u_i32: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); + case INDEX_op_ld8u_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); break; case INDEX_op_ld8s_i32: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); - tcg_out32 (s, EXTSB | RS (args[0]) | RA (args[0])); + case INDEX_op_ld8s_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); break; case INDEX_op_ld16u_i32: - tcg_out_ldst (s, args[0], args[1], args[2], LHZ, LHZX); + case INDEX_op_ld16u_i64: + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); break; case INDEX_op_ld16s_i32: - tcg_out_ldst (s, args[0], args[1], args[2], LHA, LHAX); + case INDEX_op_ld16s_i64: + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i32: - tcg_out_ldst (s, args[0], args[1], args[2], LWZ, LWZX); + case INDEX_op_ld32u_i64: + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); break; case INDEX_op_st8_i32: - tcg_out_ldst (s, args[0], args[1], args[2], STB, STBX); + case INDEX_op_st8_i64: + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); break; case INDEX_op_st16_i32: - tcg_out_ldst (s, args[0], args[1], args[2], STH, STHX); + case INDEX_op_st16_i64: + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); break; case INDEX_op_st_i32: - tcg_out_ldst (s, args[0], args[1], args[2], STW, STWX); + case INDEX_op_st32_i64: + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; case INDEX_op_add_i32: - if (const_args[2]) - ppc_addi (s, args[0], args[1], args[2]); - else - tcg_out32 (s, ADD | TAB (args[0], args[1], args[2])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_32: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } break; case INDEX_op_sub_i32: - if (const_args[2]) - ppc_addi (s, args[0], args[1], -args[2]); - else - tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } break; case INDEX_op_and_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - uint32_t c; - - c = args[2]; - - if (!c) { - tcg_out_movi (s, TCG_TYPE_I32, args[0], 0); - break; - } -#ifdef __PPU__ - uint32_t t, n; - int mb, me; - - n = c ^ -(c & 1); - t = n + (n & -n); - - if ((t & (t - 1)) == 0) { - int lzc, tzc; - - if ((c & 0x80000001) == 0x80000001) { - lzc = clz32 (n); - tzc = ctz32 (n); - - mb = 32 - tzc; - me = lzc - 1; - } - else { - lzc = clz32 (c); - tzc = ctz32 (c); - - mb = lzc; - me = 31 - tzc; - } - - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (0) - | MB (mb) - | ME (me) - ) - ); - } - else -#endif /* !__PPU__ */ - { - if ((c & 0xffff) == c) - tcg_out32 (s, ANDI | RS (args[1]) | RA (args[0]) | c); - else if ((c & 0xffff0000) == c) - tcg_out32 (s, ANDIS | RS (args[1]) | RA (args[0]) - | ((c >> 16) & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, c); - tcg_out32 (s, AND | SAB (args[1], args[0], 0)); - } - } + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); } - else - tcg_out32 (s, AND | SAB (args[1], args[0], args[2])); break; + case INDEX_op_and_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_or_i64: case INDEX_op_or_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] & 0xffff) { - tcg_out32 (s, ORI | RS (args[1]) | RA (args[0]) - | (args[2] & 0xffff)); - if (args[2] >> 16) - tcg_out32 (s, ORIS | RS (args[0]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - } - else { - tcg_out32 (s, ORIS | RS (args[1]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - } + tcg_out_ori32(s, a0, a1, a2); + } else { + tcg_out32(s, OR | SAB(a1, a0, a2)); } - else - tcg_out32 (s, OR | SAB (args[1], args[0], args[2])); break; + case INDEX_op_xor_i64: case INDEX_op_xor_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if ((args[2] & 0xffff) == args[2]) - tcg_out32 (s, XORI | RS (args[1]) | RA (args[0]) - | (args[2] & 0xffff)); - else if ((args[2] & 0xffff0000) == args[2]) - tcg_out32 (s, XORIS | RS (args[1]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]); - tcg_out32 (s, XOR | SAB (args[1], args[0], 0)); - } + tcg_out_xori32(s, a0, a1, a2); + } else { + tcg_out32(s, XOR | SAB(a1, a0, a2)); } - else - tcg_out32 (s, XOR | SAB (args[1], args[0], args[2])); break; case INDEX_op_andc_i32: - tcg_out32 (s, ANDC | SAB (args[1], args[0], args[2])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } break; case INDEX_op_orc_i32: - tcg_out32 (s, ORC | SAB (args[1], args[0], args[2])); + if (const_args[2]) { + tcg_out_ori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_orc_i64: + tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); break; case INDEX_op_eqv_i32: - tcg_out32 (s, EQV | SAB (args[1], args[0], args[2])); + if (const_args[2]) { + tcg_out_xori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_eqv_i64: + tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); break; case INDEX_op_nand_i32: - tcg_out32 (s, NAND | SAB (args[1], args[0], args[2])); + case INDEX_op_nand_i64: + tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); break; case INDEX_op_nor_i32: - tcg_out32 (s, NOR | SAB (args[1], args[0], args[2])); + case INDEX_op_nor_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); break; case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] == (int16_t) args[2]) - tcg_out32 (s, MULLI | RT (args[0]) | RA (args[1]) - | (args[2] & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]); - tcg_out32 (s, MULLW | TAB (args[0], args[1], 0)); - } + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLW | TAB(a0, a1, a2)); } - else - tcg_out32 (s, MULLW | TAB (args[0], args[1], args[2])); break; case INDEX_op_div_i32: - tcg_out32 (s, DIVW | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); break; case INDEX_op_divu_i32: - tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2])); - break; - - case INDEX_op_mulu2_i32: - if (args[0] == args[2] || args[0] == args[3]) { - tcg_out32 (s, MULLW | TAB (0, args[2], args[3])); - tcg_out32 (s, MULHWU | TAB (args[1], args[2], args[3])); - tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); - } - else { - tcg_out32 (s, MULLW | TAB (args[0], args[2], args[3])); - tcg_out32 (s, MULHWU | TAB (args[1], args[2], args[3])); - } + tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; case INDEX_op_shl_i32: if (const_args[2]) { - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (args[2]) - | MB (0) - | ME (31 - args[2]) - ) - ); - } - else - tcg_out32 (s, SLW | SAB (args[1], args[0], args[2])); + tcg_out_shli32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_shr_i32: if (const_args[2]) { - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (32 - args[2]) - | MB (args[2]) - | ME (31) - ) - ); - } - else - tcg_out32 (s, SRW | SAB (args[1], args[0], args[2])); + tcg_out_shri32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_sar_i32: - if (const_args[2]) - tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2])); - else - tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2])); + if (const_args[2]) { + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + } else { + tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_rotl_i32: - { - int op = 0 - | RA (args[0]) - | RS (args[1]) - | MB (0) - | ME (31) - | (const_args[2] ? RLWINM | SH (args[2]) - : RLWNM | RB (args[2])) - ; - tcg_out32 (s, op); + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); + } else { + tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) + | MB(0) | ME(31)); } break; case INDEX_op_rotr_i32: if (const_args[2]) { - if (!args[2]) { - tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]); - } - else { - tcg_out32 (s, RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (32 - args[2]) - | MB (0) - | ME (31) - ); - } - } - else { - tcg_out32 (s, SUBFIC | RT (0) | RA (args[2]) | 32); - tcg_out32 (s, RLWNM - | RA (args[0]) - | RS (args[1]) - | RB (0) - | MB (0) - | ME (31) - ); - } - break; - - case INDEX_op_add2_i32: - if (args[0] == args[3] || args[0] == args[5]) { - tcg_out32 (s, ADDC | TAB (0, args[2], args[4])); - tcg_out32 (s, ADDE | TAB (args[1], args[3], args[5])); - tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); - } - else { - tcg_out32 (s, ADDC | TAB (args[0], args[2], args[4])); - tcg_out32 (s, ADDE | TAB (args[1], args[3], args[5])); - } - break; - case INDEX_op_sub2_i32: - if (args[0] == args[3] || args[0] == args[5]) { - tcg_out32 (s, SUBFC | TAB (0, args[4], args[2])); - tcg_out32 (s, SUBFE | TAB (args[1], args[5], args[3])); - tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); - } - else { - tcg_out32 (s, SUBFC | TAB (args[0], args[4], args[2])); - tcg_out32 (s, SUBFE | TAB (args[1], args[5], args[3])); + tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) + | MB(0) | ME(31)); } break; case INDEX_op_brcond_i32: - /* - args[0] = r0 - args[1] = r1 - args[2] = cond - args[3] = r1 is const - args[4] = label_index - */ - tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3]); + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + args[3], TCG_TYPE_I32); + break; + case INDEX_op_brcond_i64: + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + args[3], TCG_TYPE_I64); break; case INDEX_op_brcond2_i32: tcg_out_brcond2(s, args, const_args); break; case INDEX_op_neg_i32: - tcg_out32 (s, NEG | RT (args[0]) | RA (args[1])); + case INDEX_op_neg_i64: + tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); break; case INDEX_op_not_i32: - tcg_out32 (s, NOR | SAB (args[1], args[0], args[1])); + case INDEX_op_not_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); + break; + + case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_64: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } + break; + + case INDEX_op_shl_i64: + if (const_args[2]) { + tcg_out_shli64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_shr_i64: + if (const_args[2]) { + tcg_out_shri64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_sar_i64: + if (const_args[2]) { + int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); + tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + } else { + tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); + } else { + tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); + } + break; + + case INDEX_op_mul_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_div_i64: + tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_divu_i64: + tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args, false); break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args, true); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args, false); break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args, true); break; case INDEX_op_ext8s_i32: - tcg_out32 (s, EXTSB | RS (args[1]) | RA (args[0])); - break; - case INDEX_op_ext8u_i32: - tcg_out32 (s, RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (0) - | MB (24) - | ME (31) - ); - break; + case INDEX_op_ext8s_i64: + c = EXTSB; + goto gen_ext; case INDEX_op_ext16s_i32: - tcg_out32 (s, EXTSH | RS (args[1]) | RA (args[0])); - break; - case INDEX_op_ext16u_i32: - tcg_out32 (s, RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (0) - | MB (16) - | ME (31) - ); + case INDEX_op_ext16s_i64: + c = EXTSH; + goto gen_ext; + case INDEX_op_ext32s_i64: + c = EXTSW; + goto gen_ext; + gen_ext: + tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; case INDEX_op_setcond_i32: - tcg_out_setcond (s, args[3], args[0], args[1], args[2], const_args[2]); + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2]); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2]); break; case INDEX_op_setcond2_i32: - tcg_out_setcond2 (s, args, const_args); + tcg_out_setcond2(s, args, const_args); break; case INDEX_op_bswap16_i32: - /* Stolen from gcc's builtin_bswap16 */ - + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1]; /* a1 = abcd */ - - /* r0 = (a1 << 8) & 0xff00 # 00d0 */ - tcg_out32 (s, RLWINM - | RA (0) - | RS (args[1]) - | SH (8) - | MB (16) - | ME (23) - ); - - /* a0 = rotate_left (a1, 24) & 0xff # 000c */ - tcg_out32 (s, RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (24) - | MB (24) - | ME (31) - ); - - /* a0 = a0 | r0 # 00dc */ - tcg_out32 (s, OR | SAB (0, args[0], args[0])); + if (a0 != a1) { + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ + tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); + } else { + /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = a0 | r0 # 00dc */ + tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); + } break; case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: /* Stolen from gcc's builtin_bswap32 */ - { - int a0 = args[0]; - - /* a1 = args[1] # abcd */ + a1 = args[1]; + a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; + + /* a1 = args[1] # abcd */ + /* a0 = rotate_left (a1, 8) # bcda */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + if (a0 == TCG_REG_R0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; - if (a0 == args[1]) { - a0 = 0; - } + case INDEX_op_bswap64_i64: + a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; + if (a0 == a1) { + a0 = TCG_REG_R0; + a2 = a1; + } - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out32 (s, RLWINM - | RA (a0) - | RS (args[1]) - | SH (8) - | MB (0) - | ME (31) - ); - - /* a0 = (a0 & ~0xff000000) | ((a1 << 24) & 0xff000000) # dcda */ - tcg_out32 (s, RLWIMI - | RA (a0) - | RS (args[1]) - | SH (24) - | MB (0) - | ME (7) - ); - - /* a0 = (a0 & ~0x0000ff00) | ((a1 << 24) & 0x0000ff00) # dcba */ - tcg_out32 (s, RLWIMI - | RA (a0) - | RS (args[1]) - | SH (24) - | MB (16) - | ME (23) - ); - - if (!a0) { - tcg_out_mov (s, TCG_TYPE_I32, args[0], a0); - } + /* a1 = # abcd efgh */ + /* a0 = rl32(a1, 8) # 0000 fghe */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + /* a0 = rl64(a0, 32) # hgfe 0000 */ + /* a2 = rl64(a1, 32) # efgh abcd */ + tcg_out_rld(s, RLDICL, a0, a0, 32, 0); + tcg_out_rld(s, RLDICL, a2, a1, 32, 0); + + /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); + /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); + /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); + + if (a0 == 0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); } break; case INDEX_op_deposit_i32: - tcg_out32 (s, RLWIMI - | RA (args[0]) - | RS (args[2]) - | SH (args[3]) - | MB (32 - args[3] - args[4]) - | ME (31 - args[3]) - ); + if (const_args[2]) { + uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi32(s, args[0], args[0], ~mask); + } else { + tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], + 32 - args[3] - args[4], 31 - args[3]); + } + break; + case INDEX_op_deposit_i64: + if (const_args[2]) { + uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi64(s, args[0], args[0], ~mask); + } else { + tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], + 64 - args[3] - args[4]); + } break; case INDEX_op_movcond_i32: - tcg_out_movcond (s, args[5], args[0], - args[1], args[2], - args[3], args[4], - const_args[2]); + tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add2_i64: +#else + case INDEX_op_add2_i32: +#endif + /* Note that the CA bit is defined based on the word size of the + environment. So in 64-bit mode it's always carry-out of bit 63. + The fallback code using deposit works just as well for 32-bit. */ + a0 = args[0], a1 = args[1]; + if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { + a0 = TCG_REG_R0; + } + if (const_args[4]) { + tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); + } else { + tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); + } + if (const_args[5]) { + tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); + } else { + tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_sub2_i64: +#else + case INDEX_op_sub2_i32: +#endif + a0 = args[0], a1 = args[1]; + if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { + a0 = TCG_REG_R0; + } + if (const_args[2]) { + tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); + } else { + tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); + } + if (const_args[3]) { + tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); + } else { + tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_muluh_i32: + tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i32: + tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_muluh_i64: + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i64: + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: - tcg_dump_ops (s); - tcg_abort (); + tcg_abort(); } } static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, { INDEX_op_ld16s_i32, { "r", "r" } }, { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, { INDEX_op_st16_i32, { "r", "r" } }, { INDEX_op_st_i32, { "r", "r" } }, { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_mul_i32, { "r", "r", "ri" } }, + { INDEX_op_mul_i32, { "r", "r", "rI" } }, { INDEX_op_div_i32, { "r", "r", "r" } }, { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "rI", "ri" } }, { INDEX_op_and_i32, { "r", "r", "ri" } }, { INDEX_op_or_i32, { "r", "r", "ri" } }, { INDEX_op_xor_i32, { "r", "r", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + { INDEX_op_orc_i32, { "r", "r", "ri" } }, + { INDEX_op_eqv_i32, { "r", "r", "ri" } }, + { INDEX_op_nand_i32, { "r", "r", "r" } }, + { INDEX_op_nor_i32, { "r", "r", "r" } }, { INDEX_op_shl_i32, { "r", "r", "ri" } }, { INDEX_op_shr_i32, { "r", "r", "ri" } }, { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_brcond_i32, { "r", "ri" } }, - - { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_neg_i32, { "r", "r" } }, { INDEX_op_not_i32, { "r", "r" } }, + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_andc_i32, { "r", "r", "r" } }, - { INDEX_op_orc_i32, { "r", "r", "r" } }, - { INDEX_op_eqv_i32, { "r", "r", "r" } }, - { INDEX_op_nand_i32, { "r", "r", "r" } }, - { INDEX_op_nor_i32, { "r", "r", "r" } }, - + { INDEX_op_brcond_i32, { "r", "ri" } }, { INDEX_op_setcond_i32, { "r", "r", "ri" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_muluh_i32, { "r", "r", "r" } }, + { INDEX_op_mulsh_i32, { "r", "r", "r" } }, + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "rT" } }, + { INDEX_op_sub_i64, { "r", "rI", "rT" } }, + { INDEX_op_and_i64, { "r", "r", "ri" } }, + { INDEX_op_or_i64, { "r", "r", "rU" } }, + { INDEX_op_xor_i64, { "r", "r", "rU" } }, + { INDEX_op_andc_i64, { "r", "r", "ri" } }, + { INDEX_op_orc_i64, { "r", "r", "r" } }, + { INDEX_op_eqv_i64, { "r", "r", "r" } }, + { INDEX_op_nand_i64, { "r", "r", "r" } }, + { INDEX_op_nor_i64, { "r", "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_mul_i64, { "r", "r", "rI" } }, + { INDEX_op_div_i64, { "r", "r", "r" } }, + { INDEX_op_divu_i64, { "r", "r", "r" } }, + + { INDEX_op_neg_i64, { "r", "r" } }, + { INDEX_op_not_i64, { "r", "r" } }, + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_brcond_i64, { "r", "ri" } }, + { INDEX_op_setcond_i64, { "r", "r", "ri" } }, + { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + { INDEX_op_muluh_i64, { "r", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#endif - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, +#else + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, +#endif -#if TARGET_LONG_BITS == 32 +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S" } }, +#elif TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "K", "K" } }, - { INDEX_op_qemu_st_i64, { "M", "M", "M" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, #else { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "K", "K", "K" } }, - { INDEX_op_qemu_st_i64, { "M", "M", "M", "M" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, #endif - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "r" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } }, - { -1 }, }; static void tcg_target_init(TCGContext *s) { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + if (hwcap & PPC_FEATURE_ARCH_2_06) { + have_isa_2_06 = true; + } + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_R0) | -#ifdef TCG_TARGET_CALL_DARWIN (1 << TCG_REG_R2) | -#endif (1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | @@ -1940,18 +2548,173 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_R9) | (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | - (1 << TCG_REG_R12) - ); + (1 << TCG_REG_R12)); tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); -#ifndef TCG_TARGET_CALL_DARWIN - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ +#if defined(_CALL_SYSV) + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ #endif -#ifdef _CALL_SYSV - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); +#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ #endif + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ + if (USE_REG_RA) { + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */ + } tcg_add_target_add_op_defs(ppc_op_defs); } + +#ifdef __ELF__ +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if TCG_TARGET_REG_BITS == 64 +# define ELF_HOST_MACHINE EM_PPC64 +#else +# define ELF_HOST_MACHINE EM_PPC +#endif + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ + .cie.return_column = 65, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ + 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + uint8_t *p = &debug_frame.fde_reg_ofs[3]; + int i; + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { + p[0] = 0x80 + tcg_target_callee_save_regs[i]; + p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; + } + + debug_frame.fde.func_start = (uintptr_t)buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif /* __ELF__ */ + +static size_t dcache_bsize = 16; +static size_t icache_bsize = 16; + +void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p, start1, stop1; + size_t dsize = dcache_bsize; + size_t isize = icache_bsize; + + start1 = start & ~(dsize - 1); + stop1 = (stop + dsize - 1) & ~(dsize - 1); + for (p = start1; p < stop1; p += dsize) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + + start &= start & ~(isize - 1); + stop1 = (stop + isize - 1) & ~(isize - 1); + for (p = start1; p < stop1; p += isize) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} + +#if defined _AIX +#include <sys/systemcfg.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + icache_bsize = _system_configuration.icache_line; + dcache_bsize = _system_configuration.dcache_line; +} + +#elif defined __linux__ +static void __attribute__((constructor)) tcg_cache_init(void) +{ + unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); + unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); + + if (dsize == 0 || isize == 0) { + if (dsize == 0) { + fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); + } + if (isize == 0) { + fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); + } + exit(1); + } + dcache_bsize = dsize; + icache_bsize = isize; +} + +#elif defined __APPLE__ +#include <stdio.h> +#include <sys/types.h> +#include <sys/sysctl.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len; + unsigned cacheline; + int name[2] = { CTL_HW, HW_CACHELINE }; + + len = sizeof(cacheline); + if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { + perror("sysctl CTL_HW HW_CACHELINE failed"); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} + +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/sysctl.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len = 4; + unsigned cacheline; + + if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { + fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", + strerror(errno)); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} +#endif diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index e3395e301..32ac4424d 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -21,60 +21,35 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_PPC -#define TCG_TARGET_PPC 1 +#ifndef TCG_TARGET_PPC64 +#define TCG_TARGET_PPC64 1 + +#ifdef _ARCH_PPC64 +# define TCG_TARGET_REG_BITS 64 +#else +# define TCG_TARGET_REG_BITS 32 +#endif -#define TCG_TARGET_WORDS_BIGENDIAN #define TCG_TARGET_NB_REGS 32 +#define TCG_TARGET_INSN_UNIT_SIZE 4 typedef enum { - TCG_REG_R0 = 0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31 + TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3, + TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7, + TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11, + TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15, + TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19, + TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23, + TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, + TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, + + TCG_REG_CALL_STACK = TCG_REG_R1, + TCG_AREG0 = TCG_REG_R27 } TCGReg; -/* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_R1 -#define TCG_TARGET_STACK_ALIGN 16 -#if defined _CALL_DARWIN || defined __APPLE__ -#define TCG_TARGET_CALL_STACK_OFFSET 24 -#elif defined _CALL_AIX -#define TCG_TARGET_CALL_STACK_OFFSET 52 -#elif defined _CALL_SYSV -#define TCG_TARGET_CALL_ALIGN_ARGS 1 -#define TCG_TARGET_CALL_STACK_OFFSET 8 -#else -#error Unsupported system -#endif +/* optional instructions automatically implemented */ +#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ +#define TCG_TARGET_HAS_ext16u_i32 0 /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 @@ -82,8 +57,6 @@ typedef enum { #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 @@ -95,16 +68,44 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_muluh_i32 1 +#define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_new_ldst 1 - -#define TCG_AREG0 TCG_REG_R27 +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_div_i64 1 +#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_rot_i64 1 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 0 +#define TCG_TARGET_HAS_ext16u_i64 0 +#define TCG_TARGET_HAS_ext32u_i64 0 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 +#define TCG_TARGET_HAS_not_i64 1 +#define TCG_TARGET_HAS_neg_i64 1 +#define TCG_TARGET_HAS_andc_i64 1 +#define TCG_TARGET_HAS_orc_i64 1 +#define TCG_TARGET_HAS_eqv_i64 1 +#define TCG_TARGET_HAS_nand_i64 1 +#define TCG_TARGET_HAS_nor_i64 1 +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 +#endif -#define tcg_qemu_tb_exec(env, tb_ptr) \ - ((uintptr_t __attribute__ ((longcall)) \ - (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) +void flush_icache_range(uintptr_t start, uintptr_t stop); #endif diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c deleted file mode 100644 index 06e440f9b..000000000 --- a/tcg/ppc64/tcg-target.c +++ /dev/null @@ -1,2210 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "tcg-be-ldst.h" - -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S32 0x400 -#define TCG_CT_CONST_U32 0x800 -#define TCG_CT_CONST_ZERO 0x1000 -#define TCG_CT_CONST_MONE 0x2000 - -static uint8_t *tb_ret_addr; - -#if TARGET_LONG_BITS == 32 -#define LD_ADDR LWZ -#define CMP_L 0 -#else -#define LD_ADDR LD -#define CMP_L (1<<21) -#endif - -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - -#include "elf.h" -static bool have_isa_2_06; -#define HAVE_ISA_2_06 have_isa_2_06 -#define HAVE_ISEL have_isa_2_06 - -#ifdef CONFIG_USE_GUEST_BASE -#define TCG_GUEST_BASE_REG 30 -#else -#define TCG_GUEST_BASE_REG 0 -#endif - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31" -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, /* call saved registers */ - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31, - TCG_REG_R12, /* call clobbered, non-arguments */ - TCG_REG_R11, - TCG_REG_R10, /* call clobbered, arguments */ - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_R7, - TCG_REG_R6, - TCG_REG_R5, - TCG_REG_R4, - TCG_REG_R3, -}; - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10 -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R3 -}; - -static const int tcg_target_callee_save_regs[] = { -#ifdef __APPLE__ - TCG_REG_R11, -#endif - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, /* currently used for the global env */ - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31 -}; - -static inline bool in_range_b(tcg_target_long target) -{ - return target == sextract64(target, 0, 26); -} - -static uint32_t reloc_pc24_val(void *pc, tcg_target_long target) -{ - tcg_target_long disp; - - disp = target - (tcg_target_long)pc; - assert(in_range_b(disp)); - - return disp & 0x3fffffc; -} - -static void reloc_pc24(void *pc, tcg_target_long target) -{ - *(uint32_t *)pc = (*(uint32_t *)pc & ~0x3fffffc) - | reloc_pc24_val(pc, target); -} - -static uint16_t reloc_pc14_val(void *pc, tcg_target_long target) -{ - tcg_target_long disp; - - disp = target - (tcg_target_long)pc; - if (disp != (int16_t) disp) { - tcg_abort(); - } - - return disp & 0xfffc; -} - -static void reloc_pc14(void *pc, tcg_target_long target) -{ - *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target); -} - -static inline void tcg_out_b_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *(uint32_t *)s->code_ptr & 0x3fffffc; - tcg_out32(s, insn | retrans); -} - -static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *(uint32_t *)s->code_ptr & 0xfffc; - tcg_out32(s, insn | retrans); -} - -static void patch_reloc(uint8_t *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - value += addend; - switch (type) { - case R_PPC_REL14: - reloc_pc14(code_ptr, value); - break; - case R_PPC_REL24: - reloc_pc24(code_ptr, value); - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'A': case 'B': case 'C': case 'D': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); - break; - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'L': /* qemu_ld constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); -#endif - break; - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); -#endif - break; - case 'I': - ct->ct |= TCG_CT_CONST_S16; - break; - case 'J': - ct->ct |= TCG_CT_CONST_U16; - break; - case 'M': - ct->ct |= TCG_CT_CONST_MONE; - break; - case 'T': - ct->ct |= TCG_CT_CONST_S32; - break; - case 'U': - ct->ct |= TCG_CT_CONST_U32; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { - return 1; - } - return 0; -} - -#define OPCD(opc) ((opc)<<26) -#define XO19(opc) (OPCD(19)|((opc)<<1)) -#define MD30(opc) (OPCD(30)|((opc)<<2)) -#define MDS30(opc) (OPCD(30)|((opc)<<1)) -#define XO31(opc) (OPCD(31)|((opc)<<1)) -#define XO58(opc) (OPCD(58)|(opc)) -#define XO62(opc) (OPCD(62)|(opc)) - -#define B OPCD( 18) -#define BC OPCD( 16) -#define LBZ OPCD( 34) -#define LHZ OPCD( 40) -#define LHA OPCD( 42) -#define LWZ OPCD( 32) -#define STB OPCD( 38) -#define STH OPCD( 44) -#define STW OPCD( 36) - -#define STD XO62( 0) -#define STDU XO62( 1) -#define STDX XO31(149) - -#define LD XO58( 0) -#define LDX XO31( 21) -#define LDU XO58( 1) -#define LWA XO58( 2) -#define LWAX XO31(341) - -#define ADDIC OPCD( 12) -#define ADDI OPCD( 14) -#define ADDIS OPCD( 15) -#define ORI OPCD( 24) -#define ORIS OPCD( 25) -#define XORI OPCD( 26) -#define XORIS OPCD( 27) -#define ANDI OPCD( 28) -#define ANDIS OPCD( 29) -#define MULLI OPCD( 7) -#define CMPLI OPCD( 10) -#define CMPI OPCD( 11) -#define SUBFIC OPCD( 8) - -#define LWZU OPCD( 33) -#define STWU OPCD( 37) - -#define RLWIMI OPCD( 20) -#define RLWINM OPCD( 21) -#define RLWNM OPCD( 23) - -#define RLDICL MD30( 0) -#define RLDICR MD30( 1) -#define RLDIMI MD30( 3) -#define RLDCL MDS30( 8) - -#define BCLR XO19( 16) -#define BCCTR XO19(528) -#define CRAND XO19(257) -#define CRANDC XO19(129) -#define CRNAND XO19(225) -#define CROR XO19(449) -#define CRNOR XO19( 33) - -#define EXTSB XO31(954) -#define EXTSH XO31(922) -#define EXTSW XO31(986) -#define ADD XO31(266) -#define ADDE XO31(138) -#define ADDME XO31(234) -#define ADDZE XO31(202) -#define ADDC XO31( 10) -#define AND XO31( 28) -#define SUBF XO31( 40) -#define SUBFC XO31( 8) -#define SUBFE XO31(136) -#define SUBFME XO31(232) -#define SUBFZE XO31(200) -#define OR XO31(444) -#define XOR XO31(316) -#define MULLW XO31(235) -#define MULHWU XO31( 11) -#define DIVW XO31(491) -#define DIVWU XO31(459) -#define CMP XO31( 0) -#define CMPL XO31( 32) -#define LHBRX XO31(790) -#define LWBRX XO31(534) -#define LDBRX XO31(532) -#define STHBRX XO31(918) -#define STWBRX XO31(662) -#define STDBRX XO31(660) -#define MFSPR XO31(339) -#define MTSPR XO31(467) -#define SRAWI XO31(824) -#define NEG XO31(104) -#define MFCR XO31( 19) -#define MFOCRF (MFCR | (1u << 20)) -#define NOR XO31(124) -#define CNTLZW XO31( 26) -#define CNTLZD XO31( 58) -#define ANDC XO31( 60) -#define ORC XO31(412) -#define EQV XO31(284) -#define NAND XO31(476) -#define ISEL XO31( 15) - -#define MULLD XO31(233) -#define MULHD XO31( 73) -#define MULHDU XO31( 9) -#define DIVD XO31(489) -#define DIVDU XO31(457) - -#define LBZX XO31( 87) -#define LHZX XO31(279) -#define LHAX XO31(343) -#define LWZX XO31( 23) -#define STBX XO31(215) -#define STHX XO31(407) -#define STWX XO31(151) - -#define SPR(a, b) ((((a)<<5)|(b))<<11) -#define LR SPR(8, 0) -#define CTR SPR(9, 0) - -#define SLW XO31( 24) -#define SRW XO31(536) -#define SRAW XO31(792) - -#define SLD XO31( 27) -#define SRD XO31(539) -#define SRAD XO31(794) -#define SRADI XO31(413<<1) - -#define TW XO31( 4) -#define TRAP (TW | TO(31)) - -#define RT(r) ((r)<<21) -#define RS(r) ((r)<<21) -#define RA(r) ((r)<<16) -#define RB(r) ((r)<<11) -#define TO(t) ((t)<<21) -#define SH(s) ((s)<<11) -#define MB(b) ((b)<<6) -#define ME(e) ((e)<<1) -#define BO(o) ((o)<<21) -#define MB64(b) ((b)<<5) -#define FXM(b) (1 << (19 - (b))) - -#define LK 1 - -#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) -#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) -#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) -#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) - -#define BF(n) ((n)<<23) -#define BI(n, c) (((c)+((n)*4))<<16) -#define BT(n, c) (((c)+((n)*4))<<21) -#define BA(n, c) (((c)+((n)*4))<<16) -#define BB(n, c) (((c)+((n)*4))<<11) -#define BC_(n, c) (((c)+((n)*4))<<6) - -#define BO_COND_TRUE BO(12) -#define BO_COND_FALSE BO( 4) -#define BO_ALWAYS BO(20) - -enum { - CR_LT, - CR_GT, - CR_EQ, - CR_SO -}; - -static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, -}; - -/* The low bit here is set if the RA and RB fields must be inverted. */ -static const uint32_t tcg_to_isel[] = { - [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), - [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, - [TCG_COND_LT] = ISEL | BC_(7, CR_LT), - [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GT] = ISEL | BC_(7, CR_GT), - [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), - [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), -}; - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - if (ret != arg) { - tcg_out32(s, OR | SAB(arg, ret, arg)); - } -} - -static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb) -{ - sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); - mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); -} - -static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb, int me) -{ - tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); -} - -static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) -{ - tcg_out_rld(s, RLDICL, dst, src, 0, 32); -} - -static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); -} - -static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); -} - -static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) -{ - if (arg == (int16_t) arg) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - } else { - tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg) -{ - if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { - tcg_out_movi32(s, ret, arg); - } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } else { - int32_t high = arg >> 32; - tcg_out_movi32(s, ret, high); - if (high) { - tcg_out_shli64(s, ret, ret, 32); - } - if (arg & 0xffff0000) { - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static bool mask_operand(uint32_t c, int *mb, int *me) -{ - uint32_t lsb, test; - - /* Accept a bit pattern like: - 0....01....1 - 1....10....0 - 0..01..10..0 - Keep track of the transitions. */ - if (c == 0 || c == -1) { - return false; - } - test = c; - lsb = test & -test; - test += lsb; - if (test & (test - 1)) { - return false; - } - - *me = clz32(lsb); - *mb = test ? clz32(test & -test) + 1 : 0; - return true; -} - -static bool mask64_operand(uint64_t c, int *mb, int *me) -{ - uint64_t lsb; - - if (c == 0) { - return false; - } - - lsb = c & -c; - /* Accept 1..10..0. */ - if (c == -lsb) { - *mb = 0; - *me = clz64(lsb); - return true; - } - /* Accept 0..01..1. */ - if (lsb == 1 && (c & (c + 1)) == 0) { - *mb = clz64(c + 1) + 1; - *me = 63; - return true; - } - return false; -} - -static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - int mb, me; - - if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); - } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) -{ - int mb, me; - - if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else if (mask64_operand(c, &mb, &me)) { - if (mb == 0) { - tcg_out_rld(s, RLDICR, dst, src, 0, me); - } else { - tcg_out_rld(s, RLDICL, dst, src, 0, mb); - } - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, - int op_lo, int op_hi) -{ - if (c >> 16) { - tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); - src = dst; - } - if (c & 0xffff) { - tcg_out32(s, op_lo | SAI(src, dst, c)); - src = dst; - } -} - -static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, ORI, ORIS); -} - -static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, XORI, XORIS); -} - -static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target) -{ - tcg_target_long disp; - - disp = target - (tcg_target_long)s->code_ptr; - if (in_range_b(disp)) { - tcg_out32(s, B | (disp & 0x3fffffc) | mask); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target); - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | mask); - } -} - -static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) -{ -#ifdef __APPLE__ - if (const_arg) { - tcg_out_b(s, LK, arg); - } else { - tcg_out32(s, MTSPR | RS(arg) | LR); - tcg_out32(s, BCLR | BO_ALWAYS | LK); - } -#else - TCGReg reg = arg; - int ofs = 0; - - if (const_arg) { - /* Look through the descriptor. If the branch is in range, and we - don't have to spend too much effort on building the toc. */ - intptr_t tgt = ((intptr_t *)arg)[0]; - intptr_t toc = ((intptr_t *)arg)[1]; - intptr_t diff = tgt - (intptr_t)s->code_ptr; - - if (in_range_b(diff) && toc == (uint32_t)toc) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc); - tcg_out_b(s, LK, tgt); - return; - } - - /* Fold the low bits of the constant into the addresses below. */ - ofs = (int16_t)arg; - if (ofs + 8 < 0x8000) { - arg -= ofs; - } else { - ofs = 0; - } - reg = TCG_REG_R2; - tcg_out_movi(s, TCG_TYPE_I64, reg, arg); - } - - tcg_out32(s, LD | TAI(TCG_REG_R0, reg, ofs)); - tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); - tcg_out32(s, LD | TAI(TCG_REG_R2, reg, ofs + 8)); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); -#endif -} - -static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, - TCGReg base, tcg_target_long offset) -{ - tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; - TCGReg rs = TCG_REG_R2; - - assert(rt != TCG_REG_R2 && base != TCG_REG_R2); - - switch (opi) { - case LD: case LWA: - align = 3; - /* FALLTHRU */ - default: - if (rt != TCG_REG_R0) { - rs = rt; - } - break; - case STD: - align = 3; - break; - case STB: case STH: case STW: - break; - } - - /* For unaligned, or very large offsets, use the indexed form. */ - if (offset & align || offset != (int32_t)offset) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); - tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); - return; - } - - l0 = (int16_t)offset; - offset = (offset - l0) >> 16; - l1 = (int16_t)offset; - - if (l1 < 0 && orig >= 0) { - extra = 0x4000; - l1 = (int16_t)(offset - 0x4000); - } - if (l1) { - tcg_out32(s, ADDIS | TAI(rs, base, l1)); - base = rs; - } - if (extra) { - tcg_out32(s, ADDIS | TAI(rs, base, extra)); - base = rs; - } - if (opi != ADDI || base != rt || l0 != 0) { - tcg_out32(s, opi | TAI(rt, base, l0)); - } -} - -static const uint32_t qemu_ldx_opc[16] = { - [MO_UB] = LBZX, - [MO_UW] = LHZX, - [MO_UL] = LWZX, - [MO_Q] = LDX, - [MO_SW] = LHAX, - [MO_SL] = LWAX, - [MO_BSWAP | MO_UB] = LBZX, - [MO_BSWAP | MO_UW] = LHBRX, - [MO_BSWAP | MO_UL] = LWBRX, - [MO_BSWAP | MO_Q] = LDBRX, -}; - -static const uint32_t qemu_stx_opc[16] = { - [MO_UB] = STBX, - [MO_UW] = STHX, - [MO_UL] = STWX, - [MO_Q] = STDX, - [MO_BSWAP | MO_UB] = STBX, - [MO_BSWAP | MO_UW] = STHBRX, - [MO_BSWAP | MO_UL] = STWBRX, - [MO_BSWAP | MO_Q] = STDBRX, -}; - -static const uint32_t qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; - -#if defined (CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static const void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static const void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Perform the TLB load and compare. Places the result of the comparison - in CR7, loads the addend of the TLB into R3, and returns the register - containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg, - int mem_index, bool is_read) -{ - int cmp_off - = (is_read - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - TCGReg base = TCG_AREG0; - - /* Extract the page index, shifted into place for tlb index. */ - if (TARGET_LONG_BITS == 32) { - /* Zero-extend the address into a place helpful for further use. */ - tcg_out_ext32u(s, TCG_REG_R4, addr_reg); - addr_reg = TCG_REG_R4; - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg, - 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); - } - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0)); - base = TCG_REG_R2; - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Extraction and shifting, part 2. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg, - 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), - 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), - 31 - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); - } - - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); - - /* Load the tlb comparator. */ - tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off)); - - /* Load the TLB addend for use on the fast path. Do this asap - to minimize any load use delay. */ - tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off)); - - /* Clear the non-page, non-alignment bits from the address. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (!s_bits) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS); - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); - } - - tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L); - - return addr_reg; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, - int data_reg, int addr_reg, int mem_index, - uint8_t *raddr, uint8_t *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->opc = opc; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->mem_index = mem_index; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOp opc = lb->opc; - - reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0); - - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); - - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); - tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR); - - tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[opc & ~MO_SIGN], 1); - - if (opc & MO_SIGN) { - uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; - tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3)); - } else { - tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3); - } - - tcg_out_b(s, 0, (uintptr_t)lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOp opc = lb->opc; - TCGMemOp s_bits = opc & MO_SIZE; - - reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0); - - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); - - tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg, - 0, 64 - (1 << (3 + s_bits))); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index); - tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR); - - tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1); - - tcg_out_b(s, 0, (uintptr_t)lb->raddr); -} -#endif /* SOFTMMU */ - -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOp opc, int mem_index) -{ - TCGReg rbase; - uint32_t insn; - TCGMemOp s_bits = opc & MO_SIZE; -#ifdef CONFIG_SOFTMMU - void *label_ptr; -#endif - -#ifdef CONFIG_SOFTMMU - addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_R2, addr_reg); - addr_reg = TCG_REG_R2; - } -#endif - - insn = qemu_ldx_opc[opc]; - if (!HAVE_ISA_2_06 && insn == LDBRX) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4)); - tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg)); - tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); - tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0); - } else if (insn) { - tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); - } else { - insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; - tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(data_reg) | RS(data_reg)); - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index, - s->code_ptr, label_ptr); -#endif -} - -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOp opc, int mem_index) -{ - TCGReg rbase; - uint32_t insn; -#ifdef CONFIG_SOFTMMU - void *label_ptr; -#endif - -#ifdef CONFIG_SOFTMMU - addr_reg = tcg_out_tlb_read(s, opc & MO_SIZE, addr_reg, mem_index, false); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_R2, addr_reg); - addr_reg = TCG_REG_R2; - } -#endif - - insn = qemu_stx_opc[opc]; - if (!HAVE_ISA_2_06 && insn == STDBRX) { - tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg)); - tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4)); - tcg_out_shri64(s, TCG_REG_R0, data_reg, 32); - tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2)); - } else { - tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg)); - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index, - s->code_ptr, label_ptr); -#endif -} - -#define FRAME_SIZE ((int) \ - ((8 /* back chain */ \ - + 8 /* CR */ \ - + 8 /* LR */ \ - + 8 /* compiler doubleword */ \ - + 8 /* link editor doubleword */ \ - + 8 /* TOC save area */ \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 \ - + 15) & ~15)) - -#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i; - - tcg_set_frame(s, TCG_REG_CALL_STACK, - REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long), - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#ifndef __APPLE__ - /* First emit adhoc function descriptor */ - tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */ - s->code_ptr += 16; /* skip TOC and environment pointer */ -#endif - - /* Prologue */ - tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); - tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, - REG_SAVE_BOT + i * 8)); - } - tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); - -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - - /* Epilogue */ - tb_ret_addr = s->code_ptr; - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1, - REG_SAVE_BOT + i * 8)); - } - tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); - tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); - tcg_out32(s, BCLR | BO_ALWAYS); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - if (type == TCG_TYPE_I32) { - opi = LWZ, opx = LWZX; - } else { - opi = LD, opx = LDX; - } - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - if (type == TCG_TYPE_I32) { - opi = STW, opx = STWX; - } else { - opi = STD, opx = STDX; - } - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); -} - -static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, TCGType type) -{ - int imm; - uint32_t op; - - /* Simplify the comparisons below wrt CMPI. */ - if (type == TCG_TYPE_I32) { - arg2 = (int32_t)arg2; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } else if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } - } - op = CMP; - imm = 0; - break; - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - if (const_arg2) { - if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - default: - tcg_abort(); - } - op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - - if (imm) { - tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); - } else { - if (const_arg2) { - tcg_out_movi(s, type, TCG_REG_R0, arg2); - arg2 = TCG_REG_R0; - } - tcg_out32(s, op | RA(arg1) | RB(arg2)); - } -} - -static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, - TCGReg dst, TCGReg src) -{ - tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst)); - tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5); -} - -static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) -{ - /* X != 0 implies X + -1 generates a carry. Extra addition - trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ - if (dst != src) { - tcg_out32(s, ADDIC | TAI(dst, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, dst, src)); - } else { - tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); - } -} - -static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, - bool const_arg2) -{ - if (const_arg2) { - if ((uint32_t)arg2 == arg2) { - tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); - } - } else { - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); - } - return TCG_REG_R0; -} - -static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) -{ - int crop, sh; - - /* Ignore high bits of a potential constant arg2. */ - if (type == TCG_TYPE_I32) { - arg2 = (uint32_t)arg2; - } - - /* Handle common and trivial cases before handling anything else. */ - if (arg2 == 0) { - switch (cond) { - case TCG_COND_EQ: - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - case TCG_COND_NE: - if (type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - case TCG_COND_GE: - tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); - arg1 = arg0; - /* FALLTHRU */ - case TCG_COND_LT: - /* Extract the sign bit. */ - tcg_out_rld(s, RLDICL, arg0, arg1, - type == TCG_TYPE_I64 ? 1 : 33, 63); - return; - default: - break; - } - } - - /* If we have ISEL, we can implement everything with 3 or 4 insns. - All other cases below are also at least 3 insns, so speed up the - code generator by not considering them and always using ISEL. */ - if (HAVE_ISEL) { - int isel, tab; - - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - - isel = tcg_to_isel[cond]; - - tcg_out_movi(s, type, arg0, 1); - if (isel & 1) { - /* arg0 = (bc ? 0 : 1) */ - tab = TAB(arg0, 0, arg0); - isel &= ~1; - } else { - /* arg0 = (bc ? 1 : 0) */ - tcg_out_movi(s, type, TCG_REG_R0, 0); - tab = TAB(arg0, arg0, TCG_REG_R0); - } - tcg_out32(s, isel | tab); - return; - } - - switch (cond) { - case TCG_COND_EQ: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - - case TCG_COND_NE: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - /* Discard the high bits only once, rather than both inputs. */ - if (type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - - case TCG_COND_GT: - case TCG_COND_GTU: - sh = 30; - crop = 0; - goto crtest; - - case TCG_COND_LT: - case TCG_COND_LTU: - sh = 29; - crop = 0; - goto crtest; - - case TCG_COND_GE: - case TCG_COND_GEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); - goto crtest; - - case TCG_COND_LE: - case TCG_COND_LEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); - crtest: - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - if (crop) { - tcg_out32(s, crop); - } - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_bc(TCGContext *s, int bc, int label_index) -{ - TCGLabel *l = &s->labels[label_index]; - - if (l->has_value) { - tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value)); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0); - tcg_out_bc_noaddr(s, bc); - } -} - -static void tcg_out_brcond(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index, TCGType type) -{ - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - tcg_out_bc(s, tcg_to_bc[cond], label_index); -} - -static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, - TCGArg v2, bool const_c2) -{ - /* If for some reason both inputs are zero, don't produce bad code. */ - if (v1 == 0 && v2 == 0) { - tcg_out_movi(s, type, dest, 0); - return; - } - - tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); - - if (HAVE_ISEL) { - int isel = tcg_to_isel[cond]; - - /* Swap the V operands if the operation indicates inversion. */ - if (isel & 1) { - int t = v1; - v1 = v2; - v2 = t; - isel &= ~1; - } - /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ - if (v2 == 0) { - tcg_out_movi(s, type, TCG_REG_R0, 0); - } - tcg_out32(s, isel | TAB(dest, v1, v2)); - } else { - if (dest == v2) { - cond = tcg_invert_cond(cond); - v2 = v1; - } else if (dest != v1) { - if (v1 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v1); - } - } - /* Branch forward over one insn */ - tcg_out32(s, tcg_to_bc[cond] | 8); - if (v2 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v2); - } - } -} - -void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr) -{ - TCGContext s; - unsigned long patch_size; - - s.code_ptr = (uint8_t *) jmp_addr; - tcg_out_b(&s, 0, addr); - patch_size = s.code_ptr - (uint8_t *) jmp_addr; - flush_icache_range(jmp_addr, jmp_addr + patch_size); -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - TCGArg a0, a1, a2; - int c; - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, (tcg_target_long)tb_ret_addr); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - s->code_ptr += 28; - } else { - /* Indirect jump method. */ - tcg_abort(); - } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - case INDEX_op_br: - { - TCGLabel *l = &s->labels[args[0]]; - - if (l->has_value) { - tcg_out_b(s, 0, l->u.value); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0); - tcg_out_b_noaddr(s, B); - } - } - break; - case INDEX_op_call: - tcg_out_call(s, args[0], const_args[0]); - break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_32: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_or_i64: - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_ori32(s, a0, a1, a2); - } else { - tcg_out32(s, OR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_xor_i64: - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_xori32(s, a0, a1, a2); - } else { - tcg_out32(s, XOR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_orc_i32: - if (const_args[2]) { - tcg_out_ori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_orc_i64: - tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_eqv_i32: - if (const_args[2]) { - tcg_out_xori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_eqv_i64: - tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); - break; - - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLW | TAB(a0, a1, a2)); - } - break; - - case INDEX_op_div_i32: - tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_divu_i32: - tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]); - } else { - tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31); - } else { - tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); - } else { - tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); - } else { - tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) - | MB(0) | ME(31)); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) - | MB(0) | ME(31)); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - args[3], TCG_TYPE_I32); - break; - - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - args[3], TCG_TYPE_I64); - break; - - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); - break; - - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); - break; - - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_shl_i64: - if (const_args[2]) { - tcg_out_shli64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i64: - if (const_args[2]) { - tcg_out_shri64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i64: - if (const_args[2]) { - int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); - } else { - tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); - } else { - tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); - } - break; - - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_div_i64: - tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_divu_i64: - tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); - break; - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - c = EXTSB; - goto gen_ext; - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - c = EXTSH; - goto gen_ext; - case INDEX_op_ext32s_i64: - c = EXTSW; - goto gen_ext; - gen_ext: - tcg_out32(s, c | RS(args[1]) | RA(args[0])); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); - break; - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1]; - /* a1 = abcd */ - if (a0 != a1) { - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ - tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); - } else { - /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = a0 | r0 # 00dc */ - tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); - } - break; - - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - /* Stolen from gcc's builtin_bswap32 */ - a1 = args[1]; - a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; - - /* a1 = args[1] # abcd */ - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - if (a0 == TCG_REG_R0) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); - } - break; - - case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; - if (a0 == a1) { - a0 = TCG_REG_R0; - a2 = a1; - } - - /* a1 = # abcd efgh */ - /* a0 = rl32(a1, 8) # 0000 fghe */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - /* a0 = rl64(a0, 32) # hgfe 0000 */ - /* a2 = rl64(a1, 32) # efgh abcd */ - tcg_out_rld(s, RLDICL, a0, a0, 32, 0); - tcg_out_rld(s, RLDICL, a2, a1, 32, 0); - - /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); - /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); - /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); - - if (a0 == 0) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); - } - break; - - case INDEX_op_deposit_i32: - if (const_args[2]) { - uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi32(s, args[0], args[0], ~mask); - } else { - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], - 32 - args[3] - args[4], 31 - args[3]); - } - break; - case INDEX_op_deposit_i64: - if (const_args[2]) { - uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi64(s, args[0], args[0], ~mask); - } else { - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], - 64 - args[3] - args[4]); - } - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - - case INDEX_op_add2_i64: - /* Note that the CA bit is defined based on the word size of the - environment. So in 64-bit mode it's always carry-out of bit 63. - The fallback code using deposit works just as well for 32-bit. */ - a0 = args[0], a1 = args[1]; - if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { - a0 = TCG_REG_R0; - } - if (const_args[4]) { - tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); - } else { - tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); - } - if (const_args[5]) { - tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); - } else { - tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); - } - break; - - case INDEX_op_sub2_i64: - a0 = args[0], a1 = args[1]; - if (a0 == args[5] || (!const_args[4] && a0 == args[4])) { - a0 = TCG_REG_R0; - } - if (const_args[2]) { - tcg_out32(s, SUBFIC | TAI(a0, args[3], args[2])); - } else { - tcg_out32(s, SUBFC | TAB(a0, args[3], args[2])); - } - if (const_args[4]) { - tcg_out32(s, (args[4] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); - } else { - tcg_out32(s, SUBFE | TAB(a1, args[5], args[4])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); - } - break; - - case INDEX_op_muluh_i64: - tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i64: - tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); - break; - - default: - tcg_dump_ops(s); - tcg_abort(); - } -} - -static const TCGTargetOpDef ppc_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, - { INDEX_op_br, { } }, - - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_movi_i64, { "r" } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_mul_i32, { "r", "r", "rI" } }, - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "rI", "ri" } }, - { INDEX_op_and_i32, { "r", "r", "ri" } }, - { INDEX_op_or_i32, { "r", "r", "ri" } }, - { INDEX_op_xor_i32, { "r", "r", "ri" } }, - { INDEX_op_andc_i32, { "r", "r", "ri" } }, - { INDEX_op_orc_i32, { "r", "r", "ri" } }, - { INDEX_op_eqv_i32, { "r", "r", "ri" } }, - { INDEX_op_nand_i32, { "r", "r", "r" } }, - { INDEX_op_nor_i32, { "r", "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_brcond_i32, { "r", "ri" } }, - { INDEX_op_brcond_i64, { "r", "ri" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "rT" } }, - { INDEX_op_sub_i64, { "r", "rI", "rT" } }, - { INDEX_op_and_i64, { "r", "r", "ri" } }, - { INDEX_op_or_i64, { "r", "r", "rU" } }, - { INDEX_op_xor_i64, { "r", "r", "rU" } }, - { INDEX_op_andc_i64, { "r", "r", "ri" } }, - { INDEX_op_orc_i64, { "r", "r", "r" } }, - { INDEX_op_eqv_i64, { "r", "r", "r" } }, - { INDEX_op_nand_i64, { "r", "r", "r" } }, - { INDEX_op_nor_i64, { "r", "r", "r" } }, - - { INDEX_op_shl_i64, { "r", "r", "ri" } }, - { INDEX_op_shr_i64, { "r", "r", "ri" } }, - { INDEX_op_sar_i64, { "r", "r", "ri" } }, - { INDEX_op_rotl_i64, { "r", "r", "ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - - { INDEX_op_mul_i64, { "r", "r", "rI" } }, - { INDEX_op_div_i64, { "r", "r", "r" } }, - { INDEX_op_divu_i64, { "r", "r", "r" } }, - - { INDEX_op_neg_i64, { "r", "r" } }, - { INDEX_op_not_i64, { "r", "r" } }, - - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S" } }, - { INDEX_op_qemu_st_i64, { "S", "S" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - - { INDEX_op_setcond_i32, { "r", "r", "ri" } }, - { INDEX_op_setcond_i64, { "r", "r", "ri" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, - { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - - { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, - { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, - { INDEX_op_mulsh_i64, { "r", "r", "r" } }, - { INDEX_op_muluh_i64, { "r", "r", "r" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - if (hwcap & PPC_FEATURE_ARCH_2_06) { - have_isa_2_06 = true; - } - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R0) | - (1 << TCG_REG_R2) | - (1 << TCG_REG_R3) | - (1 << TCG_REG_R4) | - (1 << TCG_REG_R5) | - (1 << TCG_REG_R6) | - (1 << TCG_REG_R7) | - (1 << TCG_REG_R8) | - (1 << TCG_REG_R9) | - (1 << TCG_REG_R10) | - (1 << TCG_REG_R11) | - (1 << TCG_REG_R12)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* mem temp */ -#ifdef __APPLE__ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */ -#endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ - - tcg_add_target_add_op_defs(ppc_op_defs); -} - -typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#define ELF_HOST_MACHINE EM_PPC64 - -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = 0x78, /* sleb128 -8 */ - .cie.return_column = 65, - - /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), - - .fde_def_cfa = { - 12, 1, /* DW_CFA_def_cfa r1, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x11, 65, 0x7e, /* DW_CFA_offset_extended_sf, lr, 16 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - uint8_t *p = &debug_frame.fde_reg_ofs[3]; - int i; - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { - p[0] = 0x80 + tcg_target_callee_save_regs[i]; - p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * 8)) / 8; - } - - debug_frame.fde.func_start = (tcg_target_long) buf; - debug_frame.fde.func_len = buf_size; - - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h deleted file mode 100644 index 7ee50b6c6..000000000 --- a/tcg/ppc64/tcg-target.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef TCG_TARGET_PPC64 -#define TCG_TARGET_PPC64 1 - -#define TCG_TARGET_WORDS_BIGENDIAN -#define TCG_TARGET_NB_REGS 32 - -typedef enum { - TCG_REG_R0 = 0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31 -} TCGReg; - -/* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_R1 -#define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_STACK_OFFSET 48 - -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ -#define TCG_TARGET_HAS_ext16u_i32 0 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 - -/* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_eqv_i32 1 -#define TCG_TARGET_HAS_nand_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 - -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 0 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 -#define TCG_TARGET_HAS_nand_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_deposit_i64 1 -#define TCG_TARGET_HAS_movcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 - -#define TCG_TARGET_HAS_new_ldst 1 - -#define TCG_AREG0 TCG_REG_R27 - -#define TCG_TARGET_EXTEND_ARGS 1 - -#endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 907d9d174..63e9c82cb 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -24,7 +24,7 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" /* We only support generating code for 64-bit mode. */ #if TCG_TARGET_REG_BITS != 64 @@ -38,11 +38,11 @@ a 32-bit displacement here Just In Case. */ #define USE_LONG_BRANCHES 0 -#define TCG_CT_CONST_32 0x0100 -#define TCG_CT_CONST_MULI 0x0800 -#define TCG_CT_CONST_ORI 0x2000 -#define TCG_CT_CONST_XORI 0x4000 -#define TCG_CT_CONST_CMPI 0x8000 +#define TCG_CT_CONST_MULI 0x100 +#define TCG_CT_CONST_ORI 0x200 +#define TCG_CT_CONST_XORI 0x400 +#define TCG_CT_CONST_CMPI 0x800 +#define TCG_CT_CONST_ADLI 0x1000 /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ @@ -228,16 +228,6 @@ typedef enum S390Opcode { RX_STH = 0x40, } S390Opcode; -#define LD_SIGNED 0x04 -#define LD_UINT8 0x00 -#define LD_INT8 (LD_UINT8 | LD_SIGNED) -#define LD_UINT16 0x01 -#define LD_INT16 (LD_UINT16 | LD_SIGNED) -#define LD_UINT32 0x02 -#define LD_INT32 (LD_UINT32 | LD_SIGNED) -#define LD_UINT64 0x03 -#define LD_INT64 (LD_UINT64 | LD_SIGNED) - #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -249,6 +239,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { call-saved registers. Likewise prefer the call-clobbered registers in reverse order to maximize the chance of avoiding the arguments. */ static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ TCG_REG_R13, TCG_REG_R12, TCG_REG_R11, @@ -257,9 +248,11 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R8, TCG_REG_R7, TCG_REG_R6, + /* Call clobbered registers. */ TCG_REG_R14, TCG_REG_R0, TCG_REG_R1, + /* Argument registers, in reverse order of allocation. */ TCG_REG_R5, TCG_REG_R4, TCG_REG_R3, @@ -319,26 +312,33 @@ static const uint8_t tcg_cond_to_ltr_cond[] = { }; #ifdef CONFIG_SOFTMMU -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LESL] = helper_le_ldsl_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BESL] = helper_be_ldsl_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; #endif -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; /* A list of relevant facilities used by this translator. Some of these are required for proper operation, and these are checked at startup. */ @@ -351,23 +351,20 @@ static uint8_t *tb_ret_addr; static uint64_t facilities; -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - intptr_t code_ptr_tl = (intptr_t)code_ptr; - intptr_t pcrel2; - - /* ??? Not the usual definition of "addend". */ - pcrel2 = (value - (code_ptr_tl + addend)) >> 1; + intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); + assert(addend == -2); switch (type) { case R_390_PC16DBL: assert(pcrel2 == (int16_t)pcrel2); - *(int16_t *)code_ptr = pcrel2; + tcg_patch16(code_ptr, pcrel2); break; case R_390_PC32DBL: assert(pcrel2 == (int32_t)pcrel2); - *(int32_t *)code_ptr = pcrel2; + tcg_patch32(code_ptr, pcrel2); break; default: tcg_abort(); @@ -407,8 +404,8 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_clear(ct->u.regs); tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); break; - case 'W': /* force 32-bit ("word") immediate */ - ct->ct |= TCG_CT_CONST_32; + case 'A': + ct->ct |= TCG_CT_CONST_ADLI; break; case 'K': ct->ct |= TCG_CT_CONST_MULI; @@ -437,10 +434,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) can load efficiently, and the immediate load plus the reg-reg OR is smaller than the sequential OI's. */ -static int tcg_match_ori(int ct, tcg_target_long val) +static int tcg_match_ori(TCGType type, tcg_target_long val) { if (facilities & FACILITY_EXT_IMM) { - if (ct & TCG_CT_CONST_32) { + if (type == TCG_TYPE_I32) { /* All 32-bit ORs can be performed with 1 48-bit insn. */ return 1; } @@ -466,13 +463,13 @@ static int tcg_match_ori(int ct, tcg_target_long val) extended-immediate facility. That said, there are a few patterns for which it is better to load the value into a register first. */ -static int tcg_match_xori(int ct, tcg_target_long val) +static int tcg_match_xori(TCGType type, tcg_target_long val) { if ((facilities & FACILITY_EXT_IMM) == 0) { return 0; } - if (ct & TCG_CT_CONST_32) { + if (type == TCG_TYPE_I32) { /* All 32-bit XORs can be performed with 1 48-bit insn. */ return 1; } @@ -487,11 +484,11 @@ static int tcg_match_xori(int ct, tcg_target_long val) /* Imediates to be used with comparisons. */ -static int tcg_match_cmpi(int ct, tcg_target_long val) +static int tcg_match_cmpi(TCGType type, tcg_target_long val) { if (facilities & FACILITY_EXT_IMM) { /* The COMPARE IMMEDIATE instruction is available. */ - if (ct & TCG_CT_CONST_32) { + if (type == TCG_TYPE_I32) { /* We have a 32-bit immediate and can compare against anything. */ return 1; } else { @@ -514,8 +511,22 @@ static int tcg_match_cmpi(int ct, tcg_target_long val) } } +/* Immediates to be used with add2/sub2. */ + +static int tcg_match_add2i(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + return 1; + } else if (val >= -0xffffffffll && val <= 0xffffffffll) { + return 1; + } + } + return 0; +} + /* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, +static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; @@ -524,8 +535,7 @@ static int tcg_target_const_match(tcg_target_long val, return 1; } - /* Handle the modifiers. */ - if (ct & TCG_CT_CONST_32) { + if (type == TCG_TYPE_I32) { val = (int32_t)val; } @@ -540,12 +550,14 @@ static int tcg_target_const_match(tcg_target_long val, } else { return val == (int16_t)val; } + } else if (ct & TCG_CT_CONST_ADLI) { + return tcg_match_add2i(type, val); } else if (ct & TCG_CT_CONST_ORI) { - return tcg_match_ori(ct, val); + return tcg_match_ori(type, val); } else if (ct & TCG_CT_CONST_XORI) { - return tcg_match_xori(ct, val); + return tcg_match_xori(type, val); } else if (ct & TCG_CT_CONST_CMPI) { - return tcg_match_cmpi(ct, val); + return tcg_match_cmpi(type, val); } return 0; @@ -677,7 +689,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, /* Try for PC-relative address load. */ if ((sval & 1) == 0) { - intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1; + ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; if (off == (int32_t)off) { tcg_out_insn(s, RIL, LARL, ret, off); return; @@ -794,10 +806,10 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, /* load data from an absolute host address */ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) { - tcg_target_long addr = (tcg_target_long)abs; + intptr_t addr = (intptr_t)abs; - if (facilities & FACILITY_GEN_INST_EXT) { - tcg_target_long disp = (addr - (tcg_target_long)s->code_ptr) >> 1; + if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { + ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; if (disp == (int32_t)disp) { if (type == TCG_TYPE_I32) { tcg_out_insn(s, RIL, LRL, dest, disp); @@ -941,6 +953,20 @@ static inline bool risbg_mask(uint64_t c) return c == -lsb; } +static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) +{ + int msb, lsb; + if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { + /* Achieve wraparound by swapping msb and lsb. */ + msb = 64 - ctz64(~val); + lsb = clz64(~val) - 1; + } else { + msb = clz64(val); + lsb = 63 - ctz64(val); + } + tcg_out_risbg(s, out, in, msb, lsb, 0, 1); +} + static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) { static const S390Opcode ni_insns[4] = { @@ -988,16 +1014,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) } } if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { - int msb, lsb; - if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { - /* Achieve wraparound by swapping msb and lsb. */ - msb = 63 - ctz64(~val); - lsb = clz64(~val) + 1; - } else { - msb = clz64(val); - lsb = 63 - ctz64(val); - } - tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1); + tgen_andi_risbg(s, dest, dest, val); return; } @@ -1118,15 +1135,100 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, return tcg_cond_to_s390_cond[c]; } -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c, +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int c2const) { - int cc = tgen_cmp(s, type, c, c1, c2, c2const); + int cc; + + switch (cond) { + case TCG_COND_GTU: + case TCG_COND_GT: + do_greater: + /* The result of a compare has CC=2 for GT and CC=3 unused. + ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ + tgen_cmp(s, type, cond, c1, c2, c2const); + tcg_out_movi(s, type, dest, 0); + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_GEU: + do_geu: + /* We need "real" carry semantics, so use SUBTRACT LOGICAL + instead of COMPARE LOGICAL. This needs an extra move. */ + tcg_out_mov(s, type, TCG_TMP0, c1); + if (c2const) { + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); + } + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + } + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_LEU: + case TCG_COND_LTU: + case TCG_COND_LT: + /* Swap operands so that we can use GEU/GTU/GT. */ + if (c2const) { + tcg_out_movi(s, type, TCG_TMP0, c2); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + } else { + TCGReg t = c1; + c1 = c2; + c2 = t; + } + if (cond == TCG_COND_LEU) { + goto do_geu; + } + cond = tcg_swap_cond(cond); + goto do_greater; + + case TCG_COND_NE: + /* X != 0 is X > 0. */ + if (c2const && c2 == 0) { + cond = TCG_COND_GTU; + goto do_greater; + } + break; + + case TCG_COND_EQ: + /* X == 0 is X <= 0 is 0 >= X. */ + if (c2const && c2 == 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + goto do_geu; + } + break; + + default: + break; + } - /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */ - tcg_out_movi(s, type, dest, 1); - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_movi(s, type, dest, 0); + cc = tgen_cmp(s, type, cond, c1, c2, c2const); + if (facilities & FACILITY_LOAD_ON_COND) { + /* Emit: d = 0, t = 1, d = (cc ? t : d). */ + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); + } else { + /* Emit: d = 1; if (cc) goto over; d = 0; over: */ + tcg_out_movi(s, type, dest, 1); + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_movi(s, type, dest, 0); + } } static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, @@ -1159,15 +1261,15 @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); } -static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest) +static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) { - tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1; - if (off > -0x8000 && off < 0x7fff) { + ptrdiff_t off = dest - s->code_ptr; + if (off == (int16_t)off) { tcg_out_insn(s, RI, BRC, cc, off); } else if (off == (int32_t)off) { tcg_out_insn(s, RIL, BRCL, cc, off); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, dest); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); } } @@ -1176,15 +1278,15 @@ static void tgen_branch(TCGContext *s, int cc, int labelno) { TCGLabel* l = &s->labels[labelno]; if (l->has_value) { - tgen_gotoi(s, cc, l->u.value); + tgen_gotoi(s, cc, l->u.value_ptr); } else if (USE_LONG_BRANCHES) { tcg_out16(s, RIL_BRCL | (cc << 4)); tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, labelno, -2); - s->code_ptr += 4; + s->code_ptr += 2; } else { tcg_out16(s, RI_BRC | (cc << 4)); tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, labelno, -2); - s->code_ptr += 2; + s->code_ptr += 1; } } @@ -1192,14 +1294,14 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, int labelno) { TCGLabel* l = &s->labels[labelno]; - tcg_target_long off; + intptr_t off; if (l->has_value) { - off = (l->u.value - (tcg_target_long)s->code_ptr) >> 1; + off = l->u.value_ptr - s->code_ptr; } else { /* We need to keep the offset unchanged for retranslation. */ - off = ((int16_t *)s->code_ptr)[1]; - tcg_out_reloc(s, s->code_ptr + 2, R_390_PC16DBL, labelno, -2); + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2); } tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); @@ -1214,11 +1316,11 @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, tcg_target_long off; if (l->has_value) { - off = (l->u.value - (tcg_target_long)s->code_ptr) >> 1; + off = l->u.value_ptr - s->code_ptr; } else { /* We need to keep the offset unchanged for retranslation. */ - off = ((int16_t *)s->code_ptr)[1]; - tcg_out_reloc(s, s->code_ptr + 2, R_390_PC16DBL, labelno, -2); + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2); } tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); @@ -1277,243 +1379,242 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, tgen_branch(s, cc, labelno); } -static void tgen_calli(TCGContext *s, tcg_target_long dest) +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) { - tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1; + ptrdiff_t off = dest - s->code_ptr; if (off == (int32_t)off) { tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, dest); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); } } -static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data, +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, TCGReg base, TCGReg index, int disp) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif switch (opc) { - case LD_UINT8: + case MO_UB: tcg_out_insn(s, RXY, LLGC, data, base, index, disp); break; - case LD_INT8: + case MO_SB: tcg_out_insn(s, RXY, LGB, data, base, index, disp); break; - case LD_UINT16: - if (bswap) { - /* swapped unsigned halfword load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16u(s, TCG_TYPE_I64, data, data); - } else { - tcg_out_insn(s, RXY, LLGH, data, base, index, disp); - } + + case MO_UW | MO_BSWAP: + /* swapped unsigned halfword load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16u(s, TCG_TYPE_I64, data, data); break; - case LD_INT16: - if (bswap) { - /* swapped sign-extended halfword load */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16s(s, TCG_TYPE_I64, data, data); - } else { - tcg_out_insn(s, RXY, LGH, data, base, index, disp); - } + case MO_UW: + tcg_out_insn(s, RXY, LLGH, data, base, index, disp); break; - case LD_UINT32: - if (bswap) { - /* swapped unsigned int load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32u(s, data, data); - } else { - tcg_out_insn(s, RXY, LLGF, data, base, index, disp); - } + + case MO_SW | MO_BSWAP: + /* swapped sign-extended halfword load */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16s(s, TCG_TYPE_I64, data, data); break; - case LD_INT32: - if (bswap) { - /* swapped sign-extended int load */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32s(s, data, data); - } else { - tcg_out_insn(s, RXY, LGF, data, base, index, disp); - } + case MO_SW: + tcg_out_insn(s, RXY, LGH, data, base, index, disp); break; - case LD_UINT64: - if (bswap) { - tcg_out_insn(s, RXY, LRVG, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, LG, data, base, index, disp); - } + + case MO_UL | MO_BSWAP: + /* swapped unsigned int load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32u(s, data, data); + break; + case MO_UL: + tcg_out_insn(s, RXY, LLGF, data, base, index, disp); break; + + case MO_SL | MO_BSWAP: + /* swapped sign-extended int load */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32s(s, data, data); + break; + case MO_SL: + tcg_out_insn(s, RXY, LGF, data, base, index, disp); + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, LRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, LG, data, base, index, disp); + break; + default: tcg_abort(); } } -static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data, +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, TCGReg base, TCGReg index, int disp) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif switch (opc) { - case LD_UINT8: + case MO_UB: if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, STC, data, base, index, disp); } else { tcg_out_insn(s, RXY, STCY, data, base, index, disp); } break; - case LD_UINT16: - if (bswap) { - tcg_out_insn(s, RXY, STRVH, data, base, index, disp); - } else if (disp >= 0 && disp < 0x1000) { + + case MO_UW | MO_BSWAP: + tcg_out_insn(s, RXY, STRVH, data, base, index, disp); + break; + case MO_UW: + if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, STH, data, base, index, disp); } else { tcg_out_insn(s, RXY, STHY, data, base, index, disp); } break; - case LD_UINT32: - if (bswap) { - tcg_out_insn(s, RXY, STRV, data, base, index, disp); - } else if (disp >= 0 && disp < 0x1000) { + + case MO_UL | MO_BSWAP: + tcg_out_insn(s, RXY, STRV, data, base, index, disp); + break; + case MO_UL: + if (disp >= 0 && disp < 0x1000) { tcg_out_insn(s, RX, ST, data, base, index, disp); } else { tcg_out_insn(s, RXY, STY, data, base, index, disp); } break; - case LD_UINT64: - if (bswap) { - tcg_out_insn(s, RXY, STRVG, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STG, data, base, index, disp); - } + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, STRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, STG, data, base, index, disp); break; + default: tcg_abort(); } } #if defined(CONFIG_SOFTMMU) -static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, - TCGReg addr_reg, int mem_index, int opc, - uint16_t **label2_ptr_p, int is_store) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB + addend into R2. Returns a register with the santitized guest address. */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, + int mem_index, bool is_ld) { - const TCGReg arg0 = tcg_target_call_iarg_regs[0]; - const TCGReg arg1 = tcg_target_call_iarg_regs[1]; - const TCGReg arg2 = tcg_target_call_iarg_regs[2]; - const TCGReg arg3 = tcg_target_call_iarg_regs[3]; - int s_bits = opc & 3; - uint16_t *label1_ptr; - tcg_target_long ofs; + TCGMemOp s_bits = opc & MO_SIZE; + uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); + int ofs; - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); + if (facilities & FACILITY_GEN_INST_EXT) { + tcg_out_risbg(s, TCG_REG_R2, addr_reg, + 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS, + 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); } - tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - - if (is_store) { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - } else { + if (is_ld) { ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + } else { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); } - assert(ofs < 0x80000); - if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } else { - tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); - } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); - } - - label1_ptr = (uint16_t*)s->code_ptr; - - /* je label1 (offset will be patched in later) */ - tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0); - - /* call load/store helper */ - if (is_store) { - /* Make sure to zero-extend the value to the full register - for the calling convention. */ - switch (opc) { - case LD_UINT8: - tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case LD_UINT16: - tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case LD_UINT32: - tgen_ext32u(s, arg2, data_reg); - break; - case LD_UINT64: - tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg); - break; - default: - tcg_abort(); - } - tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index); - tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); - tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]); - } else { - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); - tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); - tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]); + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); - /* sign extension */ - switch (opc) { - case LD_INT8: - tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - case LD_INT16: - tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - case LD_INT32: - tgen_ext32s(s, data_reg, TCG_REG_R2); - break; - default: - /* unsigned -> just copy */ - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - break; - } + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_REG_R3, addr_reg); + return TCG_REG_R3; } + return addr_reg; +} - /* jump to label2 (end) */ - *label2_ptr_p = (uint16_t*)s->code_ptr; - - tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0); +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + TCGReg data, TCGReg addr, int mem_index, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data; + label->addrlo_reg = addr; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} - /* this is label1, patch branch */ - *(label1_ptr + 1) = ((unsigned long)s->code_ptr - - (unsigned long)label1_ptr) >> 1; +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - assert(ofs < 0x80000); + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc]); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - return arg1; + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } -static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - /* patch branch */ - *(label2_ptr + 1) = ((unsigned long)s->code_ptr - - (unsigned long)label2_ptr) >> 1; + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + switch (opc & MO_SIZE) { + case MO_UB: + tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UW: + tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UL: + tgen_ext32u(s, TCG_REG_R4, data_reg); + break; + case MO_Q: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + default: + tcg_abort(); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc]); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } #else static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, @@ -1533,61 +1634,51 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, } #endif /* CONFIG_SOFTMMU */ -/* load data with address translation (if applicable) - and endianness conversion */ -static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc) +static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) { - TCGReg addr_reg, data_reg; -#if defined(CONFIG_SOFTMMU) - int mem_index; - uint16_t *label2_ptr; -#else - TCGReg index_reg; - tcg_target_long disp; -#endif - - data_reg = *args++; - addr_reg = *args++; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; -#if defined(CONFIG_SOFTMMU) - mem_index = *args; + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 0); + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); + tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else + TCGReg index_reg; + tcg_target_long disp; + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif } -static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) +static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) { - TCGReg addr_reg, data_reg; -#if defined(CONFIG_SOFTMMU) - int mem_index; - uint16_t *label2_ptr; -#else - TCGReg index_reg; - tcg_target_long disp; -#endif - - data_reg = *args++; - addr_reg = *args++; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; -#if defined(CONFIG_SOFTMMU) - mem_index = *args; + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 1); + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); + tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else + TCGReg index_reg; + tcg_target_long disp; + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif @@ -1607,34 +1698,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_exit_tb: /* return value */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); - tgen_gotoi(s, S390_CC_ALWAYS, (unsigned long)tb_ret_addr); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { - tcg_abort(); + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->code_ptr += 2; } else { /* load address stored at s->tb_next + args[0] */ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); /* and go there */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - - case INDEX_op_call: - if (const_args[0]) { - tgen_calli(s, args[0]); - } else { - tcg_out_insn(s, RR, BASR, TCG_REG_R14, args[0]); - } - break; - - case INDEX_op_mov_i32: - tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; OP_32_64(ld8u): @@ -1809,13 +1887,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add2_i32: - /* ??? Make use of ALFI. */ - tcg_out_insn(s, RR, ALR, args[0], args[4]); + if (const_args[4]) { + tcg_out_insn(s, RIL, ALFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, ALR, args[0], args[4]); + } tcg_out_insn(s, RRE, ALCR, args[1], args[5]); break; case INDEX_op_sub2_i32: - /* ??? Make use of SLFI. */ - tcg_out_insn(s, RR, SLR, args[0], args[4]); + if (const_args[4]) { + tcg_out_insn(s, RIL, SLFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, SLR, args[0], args[4]); + } tcg_out_insn(s, RRE, SLBR, args[1], args[5]); break; @@ -1836,44 +1920,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[2], const_args[2], args[3]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, LD_UINT8); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, LD_INT8); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, LD_UINT16); - break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, LD_INT16); - break; - case INDEX_op_qemu_ld32: + case INDEX_op_qemu_ld_i32: /* ??? Technically we can use a non-extending instruction. */ - tcg_out_qemu_ld(s, args, LD_UINT32); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, LD_UINT64); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, LD_UINT8); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, LD_UINT16); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, LD_UINT32); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, LD_UINT64); - break; - - case INDEX_op_mov_i64: - tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); break; case INDEX_op_ld16s_i64: @@ -2046,13 +2100,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add2_i64: - /* ??? Make use of ALGFI and SLGFI. */ - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + } tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); break; case INDEX_op_sub2_i64: - /* ??? Make use of ALGFI and SLGFI. */ - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + } tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); break; @@ -2069,19 +2137,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[2], const_args[2], args[3]); break; - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, LD_UINT32); - break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, LD_INT32); - break; - OP_32_64(deposit): tgen_deposit(s, args[0], args[2], args[3], args[4]); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: - fprintf(stderr,"unimplemented opc 0x%x\n",opc); tcg_abort(); } } @@ -2089,12 +2154,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -2112,8 +2173,8 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_and_i32, { "r", "0", "ri" } }, - { INDEX_op_or_i32, { "r", "0", "rWO" } }, - { INDEX_op_xor_i32, { "r", "0", "rWX" } }, + { INDEX_op_or_i32, { "r", "0", "rO" } }, + { INDEX_op_xor_i32, { "r", "0", "rX" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -2132,28 +2193,18 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap16_i32, { "r", "r" } }, { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, - { INDEX_op_brcond_i32, { "r", "rWC" } }, - { INDEX_op_setcond_i32, { "r", "r", "rWC" } }, - { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } }, + { INDEX_op_brcond_i32, { "r", "rC" } }, + { INDEX_op_setcond_i32, { "r", "r", "rC" } }, + { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, { INDEX_op_deposit_i32, { "r", "0", "r" } }, - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L" } }, - - { INDEX_op_qemu_st8, { "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L" } }, - - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, @@ -2200,17 +2251,14 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, - { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, { INDEX_op_brcond_i64, { "r", "rC" } }, { INDEX_op_setcond_i64, { "r", "r", "rC" } }, { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, { INDEX_op_deposit_i64, { "r", "0", "r" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, - { -1 }, }; @@ -2245,6 +2293,9 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); + /* The r6 register is technically call-saved, but it's also a parameter + register, so it can get killed by setup for the qemu_st helper. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); /* The return register can be considered call-clobbered. */ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); @@ -2257,18 +2308,17 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(s390_op_defs); } +#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long))) + static void tcg_target_qemu_prologue(TCGContext *s) { - tcg_target_long frame_size; - /* stmg %r6,%r15,48(%r15) (save registers) */ tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); /* aghi %r15,-frame_size */ - frame_size = TCG_TARGET_CALL_STACK_OFFSET; - frame_size += TCG_STATIC_CALL_ARGS_SIZE; - frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long); - tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size); + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, @@ -2287,8 +2337,53 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, - frame_size + 48); + FRAME_SIZE + 48); /* br %r14 (return) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); } + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_S390 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 8, /* sleb128 8 */ + .h.cie.return_column = TCG_REG_R14, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x86, 6, /* DW_CFA_offset, %r6, 48 */ + 0x87, 7, /* DW_CFA_offset, %r7, 56 */ + 0x88, 8, /* DW_CFA_offset, %r8, 64 */ + 0x89, 9, /* DW_CFA_offset, %r92, 72 */ + 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ + 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ + 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ + 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ + 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 10adb778c..5acc28ca6 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -24,7 +24,7 @@ #ifndef TCG_TARGET_S390 #define TCG_TARGET_S390 1 -#define TCG_TARGET_WORDS_BIGENDIAN +#define TCG_TARGET_INSN_UNIT_SIZE 2 typedef enum TCGReg { TCG_REG_R0 = 0, @@ -71,6 +71,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 @@ -99,8 +100,6 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_new_ldst 0 - extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid #define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 152335cfe..40f2ec102 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -61,6 +61,24 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif +#ifdef __arch64__ +# define SPARC64 1 +#else +# define SPARC64 0 +#endif + +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o + registers. These are saved manually by the kernel in full 64-bit + slots. The %i and %l registers are saved by the register window + mechanism, which only allocates space for 32 bits. Given that this + window spill/fill can happen on any signal, we must consider the + high bits of the %i and %l registers garbage at all times. */ +#if SPARC64 +# define ALL_64 0xffffffffu +#else +# define ALL_64 0xffffu +#endif + /* Define some temporary registers. T2 is used for constant generation. */ #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 @@ -182,6 +200,7 @@ static const int tcg_target_call_oarg_regs[] = { #define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08)) #define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c)) #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) +#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) @@ -201,6 +220,7 @@ static const int tcg_target_call_oarg_regs[] = { #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) #define JMPL (INSN_OP(2) | INSN_OP3(0x38)) +#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) #define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) #define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) #define SETHI (INSN_OP(0) | INSN_OP2(0x4)) @@ -242,48 +262,49 @@ static const int tcg_target_call_oarg_regs[] = { #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) -static inline int check_fit_tl(tcg_target_long val, unsigned int bits) +static inline int check_fit_i64(int64_t val, unsigned int bits) { - return (val << ((sizeof(tcg_target_long) * 8 - bits)) - >> (sizeof(tcg_target_long) * 8 - bits)) == val; + return val == sextract64(val, 0, bits); } -static inline int check_fit_i32(uint32_t val, unsigned int bits) +static inline int check_fit_i32(int32_t val, unsigned int bits) { - return ((val << (32 - bits)) >> (32 - bits)) == val; + return val == sextract32(val, 0, bits); } -static void patch_reloc(uint8_t *code_ptr, int type, +#define check_fit_tl check_fit_i64 +#if SPARC64 +# define check_fit_ptr check_fit_i64 +#else +# define check_fit_ptr check_fit_i32 +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { uint32_t insn; - value += addend; + + assert(addend == 0); + value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); + switch (type) { - case R_SPARC_32: - if (value != (uint32_t)value) { - tcg_abort(); - } - *(uint32_t *)code_ptr = value; - break; case R_SPARC_WDISP16: - value -= (intptr_t)code_ptr; - if (!check_fit_tl(value >> 2, 16)) { + if (!check_fit_ptr(value >> 2, 16)) { tcg_abort(); } - insn = *(uint32_t *)code_ptr; + insn = *code_ptr; insn &= ~INSN_OFF16(-1); insn |= INSN_OFF16(value); - *(uint32_t *)code_ptr = insn; + *code_ptr = insn; break; case R_SPARC_WDISP19: - value -= (intptr_t)code_ptr; - if (!check_fit_tl(value >> 2, 19)) { + if (!check_fit_ptr(value >> 2, 19)) { tcg_abort(); } - insn = *(uint32_t *)code_ptr; + insn = *code_ptr; insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(value); - *(uint32_t *)code_ptr = insn; + *code_ptr = insn; break; default: tcg_abort(); @@ -301,14 +322,27 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; - case 'L': /* qemu_ld/st constraint */ + case 'R': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - // Helper args + tcg_regset_set32(ct->u.regs, 0, ALL_64); + break; + case 'A': /* qemu_ld/st address constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, + TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); + reserve_helpers: tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); break; + case 's': /* qemu_st data 32-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + goto reserve_helpers; + case 'S': /* qemu_st data 64-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + goto reserve_helpers; case 'I': ct->ct |= TCG_CT_CONST_S11; break; @@ -327,14 +361,20 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; if (ct & TCG_CT_CONST) { return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + } + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { return 1; @@ -345,22 +385,20 @@ static inline int tcg_target_const_match(tcg_target_long val, } } -static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, - int op) +static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, + TCGReg rs2, int op) { - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | - INSN_RS2(rs2)); + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); } -static inline void tcg_out_arithi(TCGContext *s, int rd, int rs1, - uint32_t offset, int op) +static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t offset, int op) { - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | - INSN_IMM13(offset)); + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); } -static void tcg_out_arithc(TCGContext *s, int rd, int rs1, - int val2, int val2const, int op) +static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int op) { tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); @@ -374,12 +412,12 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type, } } -static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg) +static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) { tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); } -static inline void tcg_out_movi_imm13(TCGContext *s, int ret, uint32_t arg) +static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) { tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); } @@ -387,7 +425,12 @@ static inline void tcg_out_movi_imm13(TCGContext *s, int ret, uint32_t arg) static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { - tcg_target_long hi, lo; + tcg_target_long hi, lo = (int32_t)arg; + + /* Make sure we test 32-bit constants for imm13 properly. */ + if (type == TCG_TYPE_I32) { + arg = lo; + } /* A 13-bit constant sign-extended to 64-bits. */ if (check_fit_tl(arg, 13)) { @@ -396,9 +439,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (TCG_TARGET_REG_BITS == 32 - || type == TCG_TYPE_I32 - || (arg & ~0xffffffffu) == 0) { + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { tcg_out_sethi(s, ret, arg); if (arg & 0x3ff) { tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); @@ -407,21 +448,20 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* A 32-bit constant sign-extended to 64-bits. */ - if (check_fit_tl(arg, 32)) { + if (arg == lo) { tcg_out_sethi(s, ret, ~arg); tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); return; } /* A 64-bit constant decomposed into 2 32-bit pieces. */ - lo = (int32_t)arg; - if (check_fit_tl(lo, 13)) { - hi = (arg - lo) >> 31 >> 1; + if (check_fit_i32(lo, 13)) { + hi = (arg - lo) >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { - hi = arg >> 31 >> 1; + hi = arg >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); @@ -429,16 +469,16 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, - int a2, int op) +static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, + TCGReg a2, int op) { tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); } -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, - int offset, int op) +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, + intptr_t offset, int op) { - if (check_fit_tl(offset, 13)) { + if (check_fit_ptr(offset, 13)) { tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | INSN_IMM13(offset)); } else { @@ -459,40 +499,24 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); } -static inline void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) { - TCGReg base = TCG_REG_G0; - if (!check_fit_tl(arg, 10)) { - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); - base = ret; - } - tcg_out_ld(s, TCG_TYPE_PTR, ret, base, arg & 0x3ff); + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); } -static inline void tcg_out_sety(TCGContext *s, int rs) +static inline void tcg_out_sety(TCGContext *s, TCGReg rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); } -static inline void tcg_out_rdy(TCGContext *s, int rd) +static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) { tcg_out32(s, RDY | INSN_RD(rd)); } -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - if (val != 0) { - if (check_fit_tl(val, 13)) - tcg_out_arithi(s, reg, reg, val, ARITH_ADD); - else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val); - tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD); - } - } -} - -static void tcg_out_div32(TCGContext *s, int rd, int rs1, - int val2, int val2const, int uns) +static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int uns) { /* Load Y with the sign/zero extension of RS1 to 64-bits. */ if (uns) { @@ -544,47 +568,46 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label) int off19; if (l->has_value) { - off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr); + off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); } else { /* Make sure to preserve destinations during retranslation. */ - off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1); + off19 = *s->code_ptr & INSN_OFF19(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0); } tcg_out_bpcc0(s, scond, flags, off19); } -static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) { tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); } -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int const_arg2, int label) +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, int label) { tcg_out_cmp(s, arg1, arg2, const_arg2); tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label); tcg_out_nop(s); } -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret, - TCGArg v1, int v1const) +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, + int32_t v1, int v1const) { tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) | INSN_RS1(tcg_cond_to_bcond[cond]) | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); } -static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const, - TCGArg v1, int v1const) +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) { tcg_out_cmp(s, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int const_arg2, int label) +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, int label) { /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ if (arg2 == 0 && !is_unsigned_cond(cond)) { @@ -592,10 +615,10 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, int off16; if (l->has_value) { - off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr); + off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); } else { /* Make sure to preserve destinations during retranslation. */ - off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1); + off16 = *s->code_ptr & INSN_OFF16(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0); } tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) @@ -607,71 +630,32 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, tcg_out_nop(s); } -static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, - TCGArg v1, int v1const) +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, + int32_t v1, int v1const) { tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) | (tcg_cond_to_rcond[cond] << 10) | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); } -static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const, - TCGArg v1, int v1const) +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) { /* For 64-bit signed comparisons vs zero, we can avoid the compare. Note that the immediate range is one bit smaller, so we must check for that as well. */ if (c2 == 0 && !is_unsigned_cond(cond) - && (!v1const || check_fit_tl(v1, 10))) { + && (!v1const || check_fit_i32(v1, 10))) { tcg_out_movr(s, cond, ret, c1, v1, v1const); } else { tcg_out_cmp(s, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); } } -#else -static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst, int label_dest) -{ - int scond, label_next = gen_new_label(); - - tcg_out_cmp(s, ah, bh, bhconst); - - /* Note that we fill one of the delay slots with the second compare. */ - switch (cond) { - case TCG_COND_EQ: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest); - break; - case TCG_COND_NE: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - break; - - default: - scond = tcg_cond_to_bcond[tcg_high_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_nop(s); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - break; - } - tcg_out_nop(s); - - tcg_out_label(s, label_next, s->code_ptr); -} -#endif - -static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) { /* For 32-bit comparisons, we can play games with ADDX/SUBX. */ switch (cond) { @@ -696,7 +680,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, swap the operands on GTU/LEU. There's no benefit to loading the constant into a temporary register. */ if (!c2const || c2 == 0) { - TCGArg t = c1; + TCGReg t = c1; c1 = c2; c2 = t; c2const = 0; @@ -720,9 +704,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, } } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) { /* For 64-bit signed comparisons vs zero, we can avoid the compare if the input does not overlap the output. */ @@ -735,54 +718,12 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); } } -#else -static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst) -{ - int tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (ret != ah && (bhconst || ret != bh)) { - tmp = ret; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (bl == 0 && bh == 0) { - if (cond == TCG_COND_EQ) { - tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC); - tcg_out_movi(s, TCG_TYPE_I32, ret, 1); - } else { - tcg_out_arith(s, ret, al, ah, ARITH_ORCC); - } - } else { - tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - } - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1); - break; - - default: - /* <= : ah < bh | (ah == bh && al <= bl) */ - tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1); - tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1); - break; - } -} -#endif -static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, - TCGArg al, TCGArg ah, TCGArg bl, int blconst, - TCGArg bh, int bhconst, int opl, int oph) +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, int opl, int oph) { - TCGArg tmp = TCG_REG_T1; + TCGReg tmp = TCG_REG_T1; /* Note that the low parts are fully consumed before tmp is set. */ if (rl != ah && (bhconst || rl != bh)) { @@ -794,94 +735,117 @@ static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); } -static inline void tcg_out_calli(TCGContext *s, uintptr_t dest) +static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) { - intptr_t disp = dest - (uintptr_t)s->code_ptr; + ptrdiff_t disp = tcg_pcrel_diff(s, dest); if (disp == (int32_t)disp) { tcg_out32(s, CALL | (uint32_t)disp >> 2); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, dest & ~0xfff); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, dest & 0xfff, JMPL); + uintptr_t desti = (uintptr_t)dest; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); + tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); } } +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_call_nodelay(s, dest); + tcg_out_nop(s); +} + #ifdef CONFIG_SOFTMMU -static uintptr_t qemu_ld_trampoline[16]; -static uintptr_t qemu_st_trampoline[16]; +static tcg_insn_unit *qemu_ld_trampoline[16]; +static tcg_insn_unit *qemu_st_trampoline[16]; static void build_trampolines(TCGContext *s) { - static uintptr_t const qemu_ld_helpers[16] = { - [MO_UB] = (uintptr_t)helper_ret_ldub_mmu, - [MO_SB] = (uintptr_t)helper_ret_ldsb_mmu, - [MO_LEUW] = (uintptr_t)helper_le_lduw_mmu, - [MO_LESW] = (uintptr_t)helper_le_ldsw_mmu, - [MO_LEUL] = (uintptr_t)helper_le_ldul_mmu, - [MO_LEQ] = (uintptr_t)helper_le_ldq_mmu, - [MO_BEUW] = (uintptr_t)helper_be_lduw_mmu, - [MO_BESW] = (uintptr_t)helper_be_ldsw_mmu, - [MO_BEUL] = (uintptr_t)helper_be_ldul_mmu, - [MO_BEQ] = (uintptr_t)helper_be_ldq_mmu, + static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; - static uintptr_t const qemu_st_helpers[16] = { - [MO_UB] = (uintptr_t)helper_ret_stb_mmu, - [MO_LEUW] = (uintptr_t)helper_le_stw_mmu, - [MO_LEUL] = (uintptr_t)helper_le_stl_mmu, - [MO_LEQ] = (uintptr_t)helper_le_stq_mmu, - [MO_BEUW] = (uintptr_t)helper_be_stw_mmu, - [MO_BEUL] = (uintptr_t)helper_be_stl_mmu, - [MO_BEQ] = (uintptr_t)helper_be_stq_mmu, + static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; int i; TCGReg ra; - uintptr_t tramp; for (i = 0; i < 16; ++i) { - if (qemu_ld_helpers[i] == 0) { + if (qemu_ld_helpers[i] == NULL) { continue; } /* May as well align the trampoline. */ - tramp = (uintptr_t)s->code_ptr; - while (tramp & 15) { + while ((uintptr_t)s->code_ptr & 15) { tcg_out_nop(s); - tramp += 4; } - qemu_ld_trampoline[i] = tramp; + qemu_ld_trampoline[i] = s->code_ptr; - /* Find the retaddr argument register. */ - ra = TCG_REG_O3 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); + if (SPARC64 || TARGET_LONG_BITS == 32) { + ra = TCG_REG_O3; + } else { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + ra = TCG_REG_O4; + } /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); /* Set the env operand. */ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); /* Tail call. */ - tcg_out_calli(s, qemu_ld_helpers[i]); + tcg_out_call_nodelay(s, qemu_ld_helpers[i]); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); } for (i = 0; i < 16; ++i) { - if (qemu_st_helpers[i] == 0) { + if (qemu_st_helpers[i] == NULL) { continue; } /* May as well align the trampoline. */ - tramp = (uintptr_t)s->code_ptr; - while (tramp & 15) { + while ((uintptr_t)s->code_ptr & 15) { tcg_out_nop(s); - tramp += 4; } - qemu_st_trampoline[i] = tramp; - - /* Find the retaddr argument. For 32-bit, this may be past the - last argument register, and need passing on the stack. */ - ra = (TCG_REG_O4 - + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) - + (TCG_TARGET_REG_BITS == 32 && (i & MO_SIZE) == MO_64)); + qemu_st_trampoline[i] = s->code_ptr; + if (SPARC64) { + ra = TCG_REG_O4; + } else { + ra = TCG_REG_O1; + if (TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + if ((i & MO_SIZE) == MO_64) { + /* Install the high part of the data. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + /* Skip the mem_index argument. */ + ra += 1; + } + /* Set the retaddr operand. */ if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, @@ -892,7 +856,7 @@ static void build_trampolines(TCGContext *s) /* Set the env operand. */ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); /* Tail call. */ - tcg_out_calli(s, qemu_st_helpers[i]); + tcg_out_call_nodelay(s, qemu_st_helpers[i]); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); } } @@ -950,25 +914,16 @@ static void tcg_target_qemu_prologue(TCGContext *s) The result of the TLB comparison is in %[ix]cc. The sanitized address is in the returned register, maybe %o0. The TLB addend is in %o1. */ -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, int which) +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, + TCGMemOp s_bits, int which) { const TCGReg r0 = TCG_REG_O0; const TCGReg r1 = TCG_REG_O1; const TCGReg r2 = TCG_REG_O2; - TCGReg addr = addrlo; int tlb_ofs; - if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { - /* Assemble the 64-bit address in R0. */ - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); - tcg_out_arithi(s, r1, addrhi, 32, SHIFT_SLLX); - tcg_out_arith(s, r0, r0, r1, ARITH_OR); - addr = r0; - } - /* Shift the page number down. */ - tcg_out_arithi(s, r1, addrlo, TARGET_PAGE_BITS, SHIFT_SRL); + tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); /* Mask out the page offset, except for the required alignment. */ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, @@ -988,8 +943,11 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* Find a base address that can load both tlb comparator and addend. */ tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); - if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) { - tcg_out_addi(s, r1, tlb_ofs & ~0x3ff); + if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { + if (tlb_ofs & ~0x3ff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); + tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); + } tlb_ofs &= 0x3ff; } @@ -1001,11 +959,11 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_cmp(s, r0, r2, 0); /* If the guest address must be zero-extended, do so now. */ - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); return r0; } - return addrlo; + return addr; } #endif /* CONFIG_SOFTMMU */ @@ -1038,78 +996,37 @@ static const int qemu_st_opc[16] = { [MO_LEQ] = STX_LE, }; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi, bool is_64) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; - uintptr_t func; - int memi; - uint32_t *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_read)); - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - int reg64; - - /* bne,pn %[xi]cc, label0 */ - label_ptr[0] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_NE, BPCC_PN - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - tcg_out_nop(s); + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ - /* TLB Hit. */ - /* Load all 64-bits into an O/G register. */ - reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - tcg_out_ldst_rr(s, reg64, addrz, TCG_REG_O1, qemu_ld_opc[memop]); - - /* Move the two 32-bit pieces into the destination registers. */ - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - - /* b,a,pt label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0); - } else { - /* The fast path is exactly one insn. Thus we can perform the - entire TLB Hit in the (annulled) delay slot of the branch - over the TLB Miss case. */ - - /* beq,a,pt %[xi]cc, label0 */ - label_ptr[0] = NULL; - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, datalo, addrz, TCG_REG_O1, qemu_ld_opc[memop]); - } + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]); /* TLB Miss. */ - if (label_ptr[0]) { - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - } - param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ @@ -1118,213 +1035,172 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) } else { func = qemu_ld_trampoline[memop]; } - assert(func != 0); - tcg_out_calli(s, func); + assert(func != NULL); + tcg_out_call_nodelay(s, func); /* delay slot */ tcg_out_movi(s, TCG_TYPE_I32, param, memi); - switch (memop & ~MO_BSWAP) { - case MO_SL: - tcg_out_arithi(s, datalo, TCG_REG_O0, 0, SHIFT_SRA); - break; - case MO_Q: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_mov(s, TCG_TYPE_REG, datahi, TCG_REG_O0); - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O1); - break; + /* Recall that all of the helpers return 64-bit results. + Which complicates things for sparcv8plus. */ + if (SPARC64) { + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & ~MO_BSWAP) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); + } else { + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); + } + } else { + if (s_bits == MO_64) { + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); + tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); + } else if (is_64) { + /* Re-extend from 32-bit rather than reassembling when we + know the high register must be an extension. */ + tcg_out_arithi(s, data, TCG_REG_O1, 0, + memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); } - /* FALLTHRU */ - default: - /* mov */ - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O0); - break; } - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[1]); + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - - tcg_out_ldst_rr(s, reg64, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); - - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - } else { - tcg_out_ldst_rr(s, datalo, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } + tcg_out_ldst_rr(s, data, addr, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[memop]); #endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) - TCGReg addrz, datafull, param; - uintptr_t func; - int memi; - uint32_t *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; + TCGReg addrz, param; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_write)); - datafull = datalo; - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - /* Reconstruct the full 64-bit value. */ - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datafull = TCG_REG_O2; - } - /* The fast path is exactly one insn. Thus we can perform the entire TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ /* beq,a,pt %[xi]cc, label0 */ - label_ptr = (uint32_t *)s->code_ptr; + label_ptr = s->code_ptr; tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, datafull, addrz, TCG_REG_O1, qemu_st_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]); /* TLB Miss. */ param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - tcg_out_mov(s, TCG_TYPE_REG, param++, datahi); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + if (!SPARC64 && s_bits == MO_64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, datalo); + tcg_out_mov(s, TCG_TYPE_REG, param++, data); func = qemu_st_trampoline[memop]; - assert(func != 0); - tcg_out_calli(s, func); + assert(func != NULL); + tcg_out_call_nodelay(s, func); /* delay slot */ tcg_out_movi(s, TCG_TYPE_REG, param, memi); - *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr); + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datalo = TCG_REG_O2; + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, datalo, addrlo, + tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop]); #endif /* CONFIG_SOFTMMU */ } -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) { - int c; + TCGArg a0, a1, a2; + int c, c2; + + /* Hoist the loads of the most common arguments. */ + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, args[0]); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, JMPL); - tcg_out32(s, RESTORE | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_G0) | - INSN_RS2(TCG_REG_G0)); + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - uint32_t old_insn = *(uint32_t *)s->code_ptr; - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[a0] = tcg_current_code_size(s); /* Make sure to preserve links during retranslation. */ - tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1))); + tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); } else { /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + args[0])); + tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); } tcg_out_nop(s); - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; - break; - case INDEX_op_call: - if (const_args[0]) { - tcg_out_calli(s, args[0]); - } else { - tcg_out_arithi(s, TCG_REG_O7, args[0], 0, JMPL); - } - /* delay slot */ - tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]); + tcg_out_bpcc(s, COND_A, BPCC_PT, a0); tcg_out_nop(s); break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]); - break; -#if TCG_TARGET_REG_BITS == 64 #define OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -#else -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32) -#endif + OP_32_64(ld8u): - tcg_out_ldst(s, args[0], args[1], args[2], LDUB); + tcg_out_ldst(s, a0, a1, a2, LDUB); break; OP_32_64(ld8s): - tcg_out_ldst(s, args[0], args[1], args[2], LDSB); + tcg_out_ldst(s, a0, a1, a2, LDSB); break; OP_32_64(ld16u): - tcg_out_ldst(s, args[0], args[1], args[2], LDUH); + tcg_out_ldst(s, a0, a1, a2, LDUH); break; OP_32_64(ld16s): - tcg_out_ldst(s, args[0], args[1], args[2], LDSH); + tcg_out_ldst(s, a0, a1, a2, LDSH); break; case INDEX_op_ld_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_ld32u_i64: -#endif - tcg_out_ldst(s, args[0], args[1], args[2], LDUW); + tcg_out_ldst(s, a0, a1, a2, LDUW); break; OP_32_64(st8): - tcg_out_ldst(s, args[0], args[1], args[2], STB); + tcg_out_ldst(s, a0, a1, a2, STB); break; OP_32_64(st16): - tcg_out_ldst(s, args[0], args[1], args[2], STH); + tcg_out_ldst(s, a0, a1, a2, STH); break; case INDEX_op_st_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_st32_i64: -#endif - tcg_out_ldst(s, args[0], args[1], args[2], STW); + tcg_out_ldst(s, a0, a1, a2, STW); break; OP_32_64(add): c = ARITH_ADD; @@ -1351,7 +1227,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, c = SHIFT_SLL; do_shift32: /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c); + tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); break; case INDEX_op_shr_i32: c = SHIFT_SRL; @@ -1371,85 +1247,71 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, goto gen_arith1; case INDEX_op_div_i32: - tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 0); + tcg_out_div32(s, a0, a1, a2, c2, 0); break; case INDEX_op_divu_i32: - tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 1); + tcg_out_div32(s, a0, a1, a2, c2, 1); break; case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, args[2], args[0], args[1], const_args[1], - args[3]); + tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], args[3]); break; case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); break; case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); + tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2_i32(s, args[4], args[0], args[1], - args[2], const_args[2], - args[3], const_args[3], args[5]); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], - args[4], const_args[4]); - break; -#endif - case INDEX_op_add2_i32: - tcg_out_addsub2(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDX); + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4], + args[5], const_args[5], ARITH_ADDCC, ARITH_ADDX); break; case INDEX_op_sub2_i32: - tcg_out_addsub2(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBX); + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4], + args[5], const_args[5], ARITH_SUBCC, ARITH_SUBX); break; case INDEX_op_mulu2_i32: - tcg_out_arithc(s, args[0], args[2], args[3], const_args[3], - ARITH_UMUL); - tcg_out_rdy(s, args[1]); + c = ARITH_UMUL; + goto do_mul2; + case INDEX_op_muls2_i32: + c = ARITH_SMUL; + do_mul2: + /* The 32-bit multiply insns produce a full 64-bit result. If the + destination register can hold it, we can avoid the slower RDY. */ + tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); + if (SPARC64 || a0 <= TCG_REG_O7) { + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); + } else { + tcg_out_rdy(s, a1); + } break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, a0, a1, a2, args[3], false); break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, a0, a1, a2, args[3], true); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, a0, a1, a2, args[3]); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; case INDEX_op_ld32s_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LDSW); + tcg_out_ldst(s, a0, a1, a2, LDSW); break; case INDEX_op_ld_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LDX); + tcg_out_ldst(s, a0, a1, a2, LDX); break; case INDEX_op_st_i64: - tcg_out_ldst(s, args[0], args[1], args[2], STX); + tcg_out_ldst(s, a0, a1, a2, STX); break; case INDEX_op_shl_i64: c = SHIFT_SLLX; do_shift64: /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c); + tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); break; case INDEX_op_shr_i64: c = SHIFT_SRLX; @@ -1467,35 +1329,43 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, c = ARITH_UDIVX; goto gen_arith; case INDEX_op_ext32s_i64: - tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRA); + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); break; case INDEX_op_ext32u_i64: - tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRL); + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); + break; + case INDEX_op_trunc_shr_i32: + if (a2 == 0) { + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + } else { + tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); + } break; case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, args[2], args[0], args[1], const_args[1], - args[3]); + tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], args[3]); break; case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); break; case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); + tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; -#endif + gen_arith: - tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c); + tcg_out_arithc(s, a0, a1, a2, c2, c); break; gen_arith1: - tcg_out_arithc(s, args[0], TCG_REG_G0, args[1], const_args[1], c); + tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: - fprintf(stderr, "unknown opcode 0x%x\n", opc); tcg_abort(); } } @@ -1503,11 +1373,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_br, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -1539,72 +1406,53 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, -#endif - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_div_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_divu_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sub_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_andc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_or_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_xor_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_shl_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_shr_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sar_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_neg_i64, { "r", "rJ" } }, - { INDEX_op_not_i64, { "r", "rJ" } }, - - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_brcond_i64, { "rZ", "rJ" } }, - { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, -#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, -#endif + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, + + { INDEX_op_ld8u_i64, { "R", "r" } }, + { INDEX_op_ld8s_i64, { "R", "r" } }, + { INDEX_op_ld16u_i64, { "R", "r" } }, + { INDEX_op_ld16s_i64, { "R", "r" } }, + { INDEX_op_ld32u_i64, { "R", "r" } }, + { INDEX_op_ld32s_i64, { "R", "r" } }, + { INDEX_op_ld_i64, { "R", "r" } }, + { INDEX_op_st8_i64, { "RZ", "r" } }, + { INDEX_op_st16_i64, { "RZ", "r" } }, + { INDEX_op_st32_i64, { "RZ", "r" } }, + { INDEX_op_st_i64, { "RZ", "r" } }, + + { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_neg_i64, { "R", "RJ" } }, + { INDEX_op_not_i64, { "R", "RJ" } }, + + { INDEX_op_ext32s_i64, { "R", "r" } }, + { INDEX_op_ext32u_i64, { "R", "r" } }, + { INDEX_op_trunc_shr_i32, { "r", "R" } }, + + { INDEX_op_brcond_i64, { "RZ", "RJ" } }, + { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + + { INDEX_op_qemu_ld_i32, { "r", "A" } }, + { INDEX_op_qemu_ld_i64, { "R", "A" } }, + { INDEX_op_qemu_st_i32, { "sZ", "A" } }, + { INDEX_op_qemu_st_i64, { "SZ", "A" } }, { -1 }, }; @@ -1612,9 +1460,8 @@ static const TCGTargetOpDef sparc_op_defs[] = { static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); -#if TCG_TARGET_REG_BITS == 64 - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); -#endif + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_G1) | (1 << TCG_REG_G2) | @@ -1644,7 +1491,7 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(sparc_op_defs); } -#if TCG_TARGET_REG_BITS == 64 +#if SPARC64 # define ELF_HOST_MACHINE EM_SPARCV9 #else # define ELF_HOST_MACHINE EM_SPARC32PLUS @@ -1652,26 +1499,25 @@ static void tcg_target_init(TCGContext *s) #endif typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[TCG_TARGET_REG_BITS == 64 ? 4 : 2]; + DebugFrameHeader h; + uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; uint8_t fde_win_save; uint8_t fde_ret_save[3]; } DebugFrame; -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = -sizeof(void *) & 0x7f, - .cie.return_column = 15, /* o7 */ +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -sizeof(void *) & 0x7f, + .h.cie.return_column = 15, /* o7 */ /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), .fde_def_cfa = { -#if TCG_TARGET_REG_BITS == 64 +#if SPARC64 12, 30, /* DW_CFA_def_cfa i6, 2047 */ (2047 & 0x7f) | 0x80, (2047 >> 7) #else @@ -1684,9 +1530,6 @@ static DebugFrame debug_frame = { void tcg_register_jit(void *buf, size_t buf_size) { - debug_frame.fde.func_start = (uintptr_t)buf; - debug_frame.fde.func_len = buf_size; - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 3abf1b41b..089f9761c 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -24,16 +24,9 @@ #ifndef TCG_TARGET_SPARC #define TCG_TARGET_SPARC 1 -#if UINTPTR_MAX == UINT32_MAX -# define TCG_TARGET_REG_BITS 32 -#elif UINTPTR_MAX == UINT64_MAX -# define TCG_TARGET_REG_BITS 64 -#else -# error Unknown pointer size for tcg target -#endif - -#define TCG_TARGET_WORDS_BIGENDIAN +#define TCG_TARGET_REG_BITS 64 +#define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_NB_REGS 32 typedef enum { @@ -78,7 +71,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_O6 -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_STACK_BIAS 2047 #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) @@ -88,7 +81,7 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) #endif -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_EXTEND_ARGS 1 #endif @@ -114,11 +107,11 @@ typedef enum { #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 @@ -146,9 +139,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#endif - -#define TCG_TARGET_HAS_new_ldst 1 #define TCG_AREG0 TCG_REG_I0 diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h index 284db0c70..49b3de61e 100644 --- a/tcg/tcg-be-ldst.h +++ b/tcg/tcg-be-ldst.h @@ -24,15 +24,15 @@ #define TCG_MAX_QEMU_LDST 640 typedef struct TCGLabelQemuLdst { - int is_ld:1; /* qemu_ld: 1, qemu_st: 0 */ + bool is_ld:1; /* qemu_ld: true, qemu_st: false */ TCGMemOp opc:4; TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ int mem_index; /* soft MMU memory index */ - uint8_t *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ - uint8_t *label_ptr[2]; /* label pointers to be updated */ + tcg_insn_unit *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ + tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ } TCGLabelQemuLdst; typedef struct TCGBackendData { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 7eabf22f0..019dd9b6f 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -22,6 +22,8 @@ * THE SOFTWARE. */ #include "tcg.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" int gen_new_label(void); @@ -379,57 +381,6 @@ static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); } -/* A version of dh_sizemask from def-helper.h that doesn't rely on - preprocessor magic. */ -static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed) -{ - return (is_64bit << n*2) | (is_signed << (n*2 + 1)); -} - -/* helper calls */ -static inline void tcg_gen_helperN(void *func, int flags, int sizemask, - TCGArg ret, int nargs, TCGArg *args) -{ - TCGv_ptr fn; - fn = tcg_const_ptr(func); - tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret, - nargs, args); - tcg_temp_free_ptr(fn); -} - -/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently - reserved for helpers in tcg-runtime.c. These helpers all do not read - globals and do not have side effects, hence the call to tcg_gen_callN() - with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need - to be adjusted if these functions start to be used with other helpers. */ -static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, - TCGv_i32 a, TCGv_i32 b) -{ - TCGv_ptr fn; - TCGArg args[2]; - fn = tcg_const_ptr(func); - args[0] = GET_TCGV_I32(a); - args[1] = GET_TCGV_I32(b); - tcg_gen_callN(&tcg_ctx, fn, - TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS, - sizemask, GET_TCGV_I32(ret), 2, args); - tcg_temp_free_ptr(fn); -} - -static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret, - TCGv_i64 a, TCGv_i64 b) -{ - TCGv_ptr fn; - TCGArg args[2]; - fn = tcg_const_ptr(func); - args[0] = GET_TCGV_I64(a); - args[1] = GET_TCGV_I64(b); - tcg_gen_callN(&tcg_ctx, fn, - TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS, - sizemask, GET_TCGV_I64(ret), 2, args); - tcg_temp_free_ptr(fn); -} - /* 32 bit ops */ static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) @@ -717,12 +668,7 @@ static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 32-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 0, 1); - sizemask |= tcg_gen_sizemask(1, 0, 1); - sizemask |= tcg_gen_sizemask(2, 0, 1); - tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2); + gen_helper_div_i32(ret, arg1, arg2); } } @@ -742,12 +688,7 @@ static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 32-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 0, 1); - sizemask |= tcg_gen_sizemask(1, 0, 1); - sizemask |= tcg_gen_sizemask(2, 0, 1); - tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2); + gen_helper_rem_i32(ret, arg1, arg2); } } @@ -761,12 +702,7 @@ static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 32-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 0, 0); - sizemask |= tcg_gen_sizemask(1, 0, 0); - sizemask |= tcg_gen_sizemask(2, 0, 0); - tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2); + gen_helper_divu_i32(ret, arg1, arg2); } } @@ -786,12 +722,7 @@ static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 32-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 0, 0); - sizemask |= tcg_gen_sizemask(1, 0, 0); - sizemask |= tcg_gen_sizemask(2, 0, 0); - tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2); + gen_helper_remu_i32(ret, arg1, arg2); } } @@ -858,7 +789,7 @@ static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, { /* since arg2 and ret have different types, they cannot be the same temporary */ -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); #else @@ -888,7 +819,7 @@ static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); #else @@ -955,13 +886,7 @@ static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) specific code (x86) */ static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - - tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2); + gen_helper_shl_i64(ret, arg1, arg2); } static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -971,13 +896,7 @@ static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - - tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2); + gen_helper_shr_i64(ret, arg1, arg2); } static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -987,13 +906,7 @@ static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - - tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2); + gen_helper_sar_i64(ret, arg1, arg2); } static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -1061,46 +974,22 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - - tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); + gen_helper_div_i64(ret, arg1, arg2); } static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - - tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); + gen_helper_rem_i64(ret, arg1, arg2); } static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 1, 0); - sizemask |= tcg_gen_sizemask(1, 1, 0); - sizemask |= tcg_gen_sizemask(2, 1, 0); - - tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); + gen_helper_divu_i64(ret, arg1, arg2); } static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - int sizemask = 0; - /* Return value and both arguments are 64-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 1, 0); - sizemask |= tcg_gen_sizemask(1, 1, 0); - sizemask |= tcg_gen_sizemask(2, 1, 0); - - tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); + gen_helper_remu_i64(ret, arg1, arg2); } #else @@ -1367,12 +1256,7 @@ static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); + gen_helper_div_i64(ret, arg1, arg2); } } @@ -1392,12 +1276,7 @@ static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); - tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); + gen_helper_rem_i64(ret, arg1, arg2); } } @@ -1411,12 +1290,7 @@ static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 64-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 1, 0); - sizemask |= tcg_gen_sizemask(1, 1, 0); - sizemask |= tcg_gen_sizemask(2, 1, 0); - tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); + gen_helper_divu_i64(ret, arg1, arg2); } } @@ -1436,12 +1310,7 @@ static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { - int sizemask = 0; - /* Return value and both arguments are 64-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 1, 0); - sizemask |= tcg_gen_sizemask(1, 1, 0); - sizemask |= tcg_gen_sizemask(2, 1, 0); - tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); + gen_helper_remu_i64(ret, arg1, arg2); } } #endif /* TCG_TARGET_REG_BITS == 32 */ @@ -1624,9 +1493,20 @@ static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +static inline void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, + unsigned int count) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + tcg_debug_assert(count < 64); + if (count >= 32) { + tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); + } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, arg, count); + tcg_gen_mov_i32(ret, TCGV_LOW(t)); + tcg_temp_free_i64(t); + } } static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) @@ -1727,11 +1607,21 @@ static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) } } -/* Note: we assume the target supports move between 32 and 64 bit - registers. This will probably break MIPS64 targets. */ -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +static inline void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, + unsigned int count) { - tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + tcg_debug_assert(count < 64); + if (TCG_TARGET_HAS_trunc_shr_i32) { + tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, + MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); + } else if (count == 0) { + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, arg, count); + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); + tcg_temp_free_i64(t); + } } /* Note: we assume the target supports move between 32 and 64 bit @@ -2275,18 +2165,15 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, tcg_gen_deposit_i64(dest, low, high, 32, 32); } +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_trunc_shr_i64_i32(ret, arg, 0); +} + static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) { -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_mov_i32(lo, TCGV_LOW(arg)); - tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); -#else - TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_trunc_i64_i32(lo, arg); - tcg_gen_shri_i64(t0, arg, 32); - tcg_gen_trunc_i64_i32(hi, t0); - tcg_temp_free_i64(t0); -#endif + tcg_gen_trunc_shr_i64_i32(lo, arg, 0); + tcg_gen_trunc_shr_i64_i32(hi, arg, 32); } static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) @@ -2437,14 +2324,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); - } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { + } else if (TCG_TARGET_REG_BITS == 32) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); TCGv_i32 t3 = tcg_temp_new_i32(); - tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(INDEX_op_nop); + tcg_gen_mulu2_i32(t0, t1, arg1, arg2); /* Adjust for negative inputs. */ tcg_gen_sari_i32(t2, arg1, 31); tcg_gen_sari_i32(t3, arg2, 31); @@ -2522,35 +2407,10 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (TCG_TARGET_HAS_mulu2_i64) { - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 t2 = tcg_temp_new_i64(); - TCGv_i64 t3 = tcg_temp_new_i64(); - tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(INDEX_op_nop); - /* Adjust for negative inputs. */ - tcg_gen_sari_i64(t2, arg1, 63); - tcg_gen_sari_i64(t3, arg2, 63); - tcg_gen_and_i64(t2, t2, arg2); - tcg_gen_and_i64(t3, t3, arg1); - tcg_gen_sub_i64(rh, t1, t2); - tcg_gen_sub_i64(rh, rh, t3); - tcg_gen_mov_i64(rl, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); - tcg_temp_free_i64(t2); - tcg_temp_free_i64(t3); } else { TCGv_i64 t0 = tcg_temp_new_i64(); - int sizemask = 0; - /* Return value and both arguments are 64-bit and unsigned. */ - sizemask |= tcg_gen_sizemask(0, 1, 0); - sizemask |= tcg_gen_sizemask(1, 1, 0); - sizemask |= tcg_gen_sizemask(2, 1, 0); tcg_gen_mul_i64(t0, arg1, arg2); - tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2); + gen_helper_muluh_i64(rh, arg1, arg2); tcg_gen_mov_i64(rl, t0); tcg_temp_free_i64(t0); } @@ -2569,15 +2429,28 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); + } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + tcg_gen_mulu2_i64(t0, t1, arg1, arg2); + /* Adjust for negative inputs. */ + tcg_gen_sari_i64(t2, arg1, 63); + tcg_gen_sari_i64(t3, arg2, 63); + tcg_gen_and_i64(t2, t2, arg2); + tcg_gen_and_i64(t3, t3, arg1); + tcg_gen_sub_i64(rh, t1, t2); + tcg_gen_sub_i64(rh, rh, t3); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_temp_free_i64(t3); } else { TCGv_i64 t0 = tcg_temp_new_i64(); - int sizemask = 0; - /* Return value and both arguments are 64-bit and signed. */ - sizemask |= tcg_gen_sizemask(0, 1, 1); - sizemask |= tcg_gen_sizemask(1, 1, 1); - sizemask |= tcg_gen_sizemask(2, 1, 1); tcg_gen_mul_i64(t0, arg1, arg2); - tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2); + gen_helper_mulsh_i64(rh, arg1, arg2); tcg_gen_mov_i64(rl, t0); tcg_temp_free_i64(t0); } @@ -2845,7 +2718,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 -#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 #define tcg_gen_andc_tl tcg_gen_andc_i32 #define tcg_gen_eqv_tl tcg_gen_eqv_i32 #define tcg_gen_nand_tl tcg_gen_nand_i32 @@ -2865,7 +2738,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #endif -#if TCG_TARGET_REG_BITS == 32 +#if UINTPTR_MAX == UINT32_MAX # define tcg_gen_ld_ptr(R, A, O) \ tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O)) # define tcg_gen_discard_ptr(A) \ @@ -2887,4 +2760,4 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) # define tcg_gen_ext_i32_ptr(R, A) \ tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A)) -#endif /* TCG_TARGET_REG_BITS == 32 */ +#endif /* UINTPTR_MAX == UINT32_MAX */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index d71707d9b..042d442c7 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -40,7 +40,7 @@ DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) /* variable number of parameters */ -DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) +DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) DEF(br, 0, 0, 1, TCG_OPF_BB_END) @@ -51,8 +51,8 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END) # define IMPL64 TCG_OPF_64BIT #endif -DEF(mov_i32, 1, 1, 0, 0) -DEF(movi_i32, 1, 0, 1, 0) +DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) +DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(setcond_i32, 1, 2, 1, 0) DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) /* load/store */ @@ -110,8 +110,8 @@ DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) -DEF(mov_i64, 1, 1, 0, IMPL64) -DEF(movi_i64, 1, 0, 1, IMPL64) +DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) DEF(setcond_i64, 1, 2, 1, IMPL64) DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) /* load/store */ @@ -147,6 +147,10 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) +DEF(trunc_shr_i32, 1, 1, 1, + IMPL(TCG_TARGET_HAS_trunc_shr_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) + DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) @@ -181,106 +185,20 @@ DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -#define IMPL_NEW_LDST \ - (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \ - | IMPL(TCG_TARGET_HAS_new_ldst)) - -#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS -DEF(qemu_ld_i32, 1, 1, 2, IMPL_NEW_LDST) -DEF(qemu_st_i32, 0, 2, 2, IMPL_NEW_LDST) -# if TCG_TARGET_REG_BITS == 64 -DEF(qemu_ld_i64, 1, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -DEF(qemu_st_i64, 0, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -# else -DEF(qemu_ld_i64, 2, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -DEF(qemu_st_i64, 0, 3, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -# endif -#else -DEF(qemu_ld_i32, 1, 2, 2, IMPL_NEW_LDST) -DEF(qemu_st_i32, 0, 3, 2, IMPL_NEW_LDST) -DEF(qemu_ld_i64, 2, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -DEF(qemu_st_i64, 0, 4, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) -#endif - -#undef IMPL_NEW_LDST - -#define IMPL_OLD_LDST \ - (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \ - | IMPL(!TCG_TARGET_HAS_new_ldst)) - -#if TCG_TARGET_REG_BITS == 32 -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST) -#else -DEF(qemu_ld8u, 1, 2, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST) -#else -DEF(qemu_ld8s, 1, 2, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST) -#else -DEF(qemu_ld16u, 1, 2, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST) -#else -DEF(qemu_ld16s, 1, 2, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST) -#else -DEF(qemu_ld32, 1, 2, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_ld64, 2, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -#else -DEF(qemu_ld64, 2, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -#endif - -#if TARGET_LONG_BITS == 32 -DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST) -#else -DEF(qemu_st8, 0, 3, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST) -#else -DEF(qemu_st16, 0, 3, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST) -#else -DEF(qemu_st32, 0, 3, 1, IMPL_OLD_LDST) -#endif -#if TARGET_LONG_BITS == 32 -DEF(qemu_st64, 0, 3, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -#else -DEF(qemu_st64, 0, 4, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -#endif - -#else /* TCG_TARGET_REG_BITS == 32 */ - -DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld32u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld32s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_ld64, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) - -DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_st64, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) - -#endif /* TCG_TARGET_REG_BITS != 32 */ +#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) -#undef IMPL_OLD_LDST +DEF(qemu_ld_i32, 1, TLADDR_ARGS, 2, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 2, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 2, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 2, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +#undef TLADDR_ARGS +#undef DATA64_ARGS #undef IMPL #undef IMPL64 #undef DEF diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index a1ebef9f9..23a0c3771 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -1,20 +1,16 @@ -#ifndef TCG_RUNTIME_H -#define TCG_RUNTIME_H +DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32) +DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32) +DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(remu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) -/* tcg-runtime.c */ -int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2); -int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2); -uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2); -uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2); +DEF_HELPER_FLAGS_2(div_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(rem_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(divu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(remu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) -int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2); -int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); -int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); -int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2); -int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2); -int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2); -uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2); -uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2); -uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2); +DEF_HELPER_FLAGS_2(shl_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(shr_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) -#endif +DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) @@ -37,7 +37,6 @@ #endif #include "qemu-common.h" -#include "qemu/cache-utils.h" #include "qemu/host-utils.h" #include "qemu/timer.h" @@ -65,7 +64,7 @@ /* Forward declarations for functions declared in tcg-target.c and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); /* The CIE and FDE header definitions will be common to all hosts. */ @@ -86,8 +85,14 @@ typedef struct QEMU_PACKED { uintptr_t func_len; } DebugFrameFDEHeader; +typedef struct QEMU_PACKED { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; +} DebugFrameHeader; + static void tcg_register_jit_int(void *buf, size_t size, - void *debug_frame, size_t debug_frame_size) + const void *debug_frame, + size_t debug_frame_size) __attribute__((unused)); /* Forward declarations for functions declared and used in tcg-target.c. */ @@ -101,7 +106,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args); static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); -static int tcg_target_const_match(tcg_target_long val, +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); +static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); static void tcg_out_tb_init(TCGContext *s); static void tcg_out_tb_finalize(TCGContext *s); @@ -117,35 +123,91 @@ const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); static TCGRegSet tcg_target_available_regs[2]; static TCGRegSet tcg_target_call_clobber_regs; -static inline void tcg_out8(TCGContext *s, uint8_t v) +#if TCG_TARGET_INSN_UNIT_SIZE == 1 +static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) { *s->code_ptr++ = v; } -static inline void tcg_out16(TCGContext *s, uint16_t v) +static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, + uint8_t v) +{ + *p = v; +} +#endif + +#if TCG_TARGET_INSN_UNIT_SIZE <= 2 +static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) +{ + if (TCG_TARGET_INSN_UNIT_SIZE == 2) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); + } +} + +static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, + uint16_t v) +{ + if (TCG_TARGET_INSN_UNIT_SIZE == 2) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } +} +#endif + +#if TCG_TARGET_INSN_UNIT_SIZE <= 4 +static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) +{ + if (TCG_TARGET_INSN_UNIT_SIZE == 4) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); + } +} + +static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, + uint32_t v) { - uint8_t *p = s->code_ptr; - *(uint16_t *)p = v; - s->code_ptr = p + 2; + if (TCG_TARGET_INSN_UNIT_SIZE == 4) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } } +#endif -static inline void tcg_out32(TCGContext *s, uint32_t v) +#if TCG_TARGET_INSN_UNIT_SIZE <= 8 +static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) { - uint8_t *p = s->code_ptr; - *(uint32_t *)p = v; - s->code_ptr = p + 4; + if (TCG_TARGET_INSN_UNIT_SIZE == 8) { + *s->code_ptr++ = v; + } else { + tcg_insn_unit *p = s->code_ptr; + memcpy(p, &v, sizeof(v)); + s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); + } } -static inline void tcg_out64(TCGContext *s, uint64_t v) +static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, + uint64_t v) { - uint8_t *p = s->code_ptr; - *(uint64_t *)p = v; - s->code_ptr = p + 8; + if (TCG_TARGET_INSN_UNIT_SIZE == 8) { + *p = v; + } else { + memcpy(p, &v, sizeof(v)); + } } +#endif /* label relocation processing */ -static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, +static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, int label_index, intptr_t addend) { TCGLabel *l; @@ -168,23 +230,20 @@ static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, } } -static void tcg_out_label(TCGContext *s, int label_index, void *ptr) +static void tcg_out_label(TCGContext *s, int label_index, tcg_insn_unit *ptr) { - TCGLabel *l; - TCGRelocation *r; + TCGLabel *l = &s->labels[label_index]; intptr_t value = (intptr_t)ptr; + TCGRelocation *r; - l = &s->labels[label_index]; - if (l->has_value) { - tcg_abort(); - } - r = l->u.first_reloc; - while (r != NULL) { + assert(!l->has_value); + + for (r = l->u.first_reloc; r != NULL; r = r->next) { patch_reloc(r->ptr, r->type, value, r->addend); - r = r->next; } + l->has_value = 1; - l->u.value = value; + l->u.value_ptr = ptr; } int gen_new_label(void) @@ -257,32 +316,17 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } -#include "helper.h" - typedef struct TCGHelperInfo { void *func; const char *name; + unsigned flags; + unsigned sizemask; } TCGHelperInfo; +#include "exec/helper-proto.h" + static const TCGHelperInfo all_helpers[] = { -#define GEN_HELPER 2 -#include "helper.h" - - /* Include tcg-runtime.c functions. */ - { tcg_helper_div_i32, "div_i32" }, - { tcg_helper_rem_i32, "rem_i32" }, - { tcg_helper_divu_i32, "divu_i32" }, - { tcg_helper_remu_i32, "remu_i32" }, - - { tcg_helper_shl_i64, "shl_i64" }, - { tcg_helper_shr_i64, "shr_i64" }, - { tcg_helper_sar_i64, "sar_i64" }, - { tcg_helper_div_i64, "div_i64" }, - { tcg_helper_rem_i64, "rem_i64" }, - { tcg_helper_divu_i64, "divu_i64" }, - { tcg_helper_remu_i64, "remu_i64" }, - { tcg_helper_mulsh_i64, "mulsh_i64" }, - { tcg_helper_muluh_i64, "muluh_i64" }, +#include "exec/helper-tcg.h" }; void tcg_context_init(TCGContext *s) @@ -323,7 +367,7 @@ void tcg_context_init(TCGContext *s) for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, - (gpointer)all_helpers[i].name); + (gpointer)&all_helpers[i]); } tcg_target_init(s); @@ -339,7 +383,7 @@ void tcg_prologue_init(TCGContext *s) #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { - size_t size = s->code_ptr - s->code_buf; + size_t size = tcg_current_code_size(s); qemu_log("PROLOGUE: [size=%zu]\n", size); log_disas(s->code_buf, size); qemu_log("\n"); @@ -444,7 +488,7 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg, ts->fixed_reg = 0; ts->mem_allocated = 1; ts->mem_reg = reg; -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN ts->mem_offset = offset + 4; #else ts->mem_offset = offset; @@ -459,7 +503,7 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg, ts->fixed_reg = 0; ts->mem_allocated = 1; ts->mem_reg = reg; -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN ts->mem_offset = offset; #else ts->mem_offset = offset + 4; @@ -656,15 +700,48 @@ int tcg_check_temp_count(void) /* Note: we convert the 64 bit args to 32 bit and do some alignment and endian swap. Maybe it would be better to do the alignment and endian swap in tcg_reg_alloc_call(). */ -void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, - int sizemask, TCGArg ret, int nargs, TCGArg *args) +void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, + int nargs, TCGArg *args) { - int i; - int real_args; - int nb_rets; + int i, real_args, nb_rets; + unsigned sizemask, flags; TCGArg *nparam; - -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + TCGHelperInfo *info; + + info = g_hash_table_lookup(s->helpers, (gpointer)func); + flags = info->flags; + sizemask = info->sizemask; + +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* We have 64-bit values in one register, but need to pass as two + separate parameters. Split them. */ + int orig_sizemask = sizemask; + int orig_nargs = nargs; + TCGv_i64 retl, reth; + + TCGV_UNUSED_I64(retl); + TCGV_UNUSED_I64(reth); + if (sizemask != 0) { + TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); + for (i = real_args = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + TCGv_i32 h = tcg_temp_new_i32(); + TCGv_i32 l = tcg_temp_new_i32(); + tcg_gen_extr_i64_i32(l, h, orig); + split_args[real_args++] = GET_TCGV_I32(h); + split_args[real_args++] = GET_TCGV_I32(l); + } else { + split_args[real_args++] = args[i]; + } + } + nargs = real_args; + args = split_args; + sizemask = 0; + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); int is_signed = sizemask & (2 << (i+1)*2); @@ -684,9 +761,24 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opc_ptr++ = INDEX_op_call; nparam = s->gen_opparam_ptr++; if (ret != TCG_CALL_DUMMY_ARG) { -#if TCG_TARGET_REG_BITS < 64 - if (sizemask & 1) { -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + if (orig_sizemask & 1) { + /* The 32-bit ABI is going to return the 64-bit value in + the %o0/%o1 register pair. Prepare for this by using + two return temporaries, and reassemble below. */ + retl = tcg_temp_new_i64(); + reth = tcg_temp_new_i64(); + *s->gen_opparam_ptr++ = GET_TCGV_I64(reth); + *s->gen_opparam_ptr++ = GET_TCGV_I64(retl); + nb_rets = 2; + } else { + *s->gen_opparam_ptr++ = ret; + nb_rets = 1; + } +#else + if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { +#ifdef HOST_WORDS_BIGENDIAN *s->gen_opparam_ptr++ = ret + 1; *s->gen_opparam_ptr++ = ret; #else @@ -694,20 +786,18 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opparam_ptr++ = ret + 1; #endif nb_rets = 2; - } else -#endif - { + } else { *s->gen_opparam_ptr++ = ret; nb_rets = 1; } +#endif } else { nb_rets = 0; } real_args = 0; for (i = 0; i < nargs; i++) { -#if TCG_TARGET_REG_BITS < 64 int is_64bit = sizemask & (1 << (i+1)*2); - if (is_64bit) { + if (TCG_TARGET_REG_BITS < 64 && is_64bit) { #ifdef TCG_TARGET_CALL_ALIGN_ARGS /* some targets want aligned 64 bit args */ if (real_args & 1) { @@ -725,7 +815,7 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, order. If another such target is added, this logic may have to get more complicated to differentiate between stack arguments and register arguments. */ -#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) +#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) *s->gen_opparam_ptr++ = args[i] + 1; *s->gen_opparam_ptr++ = args[i]; #else @@ -735,21 +825,41 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, real_args += 2; continue; } -#endif /* TCG_TARGET_REG_BITS < 64 */ *s->gen_opparam_ptr++ = args[i]; real_args++; } - *s->gen_opparam_ptr++ = GET_TCGV_PTR(func); - + *s->gen_opparam_ptr++ = (uintptr_t)func; *s->gen_opparam_ptr++ = flags; - *nparam = (nb_rets << 16) | (real_args + 1); + *nparam = (nb_rets << 16) | real_args; /* total parameters, needed to go backward in the instruction stream */ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* Free all of the parts we allocated above. */ + for (i = real_args = 0; i < orig_nargs; ++i) { + int is_64bit = orig_sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); + TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); + tcg_temp_free_i32(h); + tcg_temp_free_i32(l); + } else { + real_args++; + } + } + if (orig_sizemask & 1) { + /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. + Note that describing these as TCGv_i64 eliminates an unnecessary + zero-extension that tcg_gen_concat_i32_i64 would create. */ + tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); + tcg_temp_free_i64(retl); + tcg_temp_free_i64(reth); + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); if (!is_64bit) { @@ -834,97 +944,26 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) return op; } -static const TCGOpcode old_ld_opc[8] = { - [MO_UB] = INDEX_op_qemu_ld8u, - [MO_SB] = INDEX_op_qemu_ld8s, - [MO_UW] = INDEX_op_qemu_ld16u, - [MO_SW] = INDEX_op_qemu_ld16s, -#if TCG_TARGET_REG_BITS == 32 - [MO_UL] = INDEX_op_qemu_ld32, - [MO_SL] = INDEX_op_qemu_ld32, -#else - [MO_UL] = INDEX_op_qemu_ld32u, - [MO_SL] = INDEX_op_qemu_ld32s, -#endif - [MO_Q] = INDEX_op_qemu_ld64, -}; - -static const TCGOpcode old_st_opc[4] = { - [MO_UB] = INDEX_op_qemu_st8, - [MO_UW] = INDEX_op_qemu_st16, - [MO_UL] = INDEX_op_qemu_st32, - [MO_Q] = INDEX_op_qemu_st64, -}; - void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 0); - if (TCG_TARGET_HAS_new_ldst) { - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; - return; - } - - /* The old opcodes only support target-endian memory operations. */ - assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); - assert(old_ld_opc[memop & MO_SSIZE] != 0); - - if (TCG_TARGET_REG_BITS == 32) { - *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = idx; - } else { - TCGv_i64 val64 = tcg_temp_new_i64(); - - *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; - tcg_add_param_i64(val64); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = idx; - - tcg_gen_trunc_i64_i32(val, val64); - tcg_temp_free_i64(val64); - } + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 1); - if (TCG_TARGET_HAS_new_ldst) { - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; - return; - } - - /* The old opcodes only support target-endian memory operations. */ - assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); - assert(old_st_opc[memop & MO_SIZE] != 0); - - if (TCG_TARGET_REG_BITS == 32) { - *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = idx; - } else { - TCGv_i64 val64 = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(val64, val); - - *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; - tcg_add_param_i64(val64); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = idx; - - tcg_temp_free_i64(val64); - } + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; } void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) @@ -943,22 +982,10 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) } #endif - if (TCG_TARGET_HAS_new_ldst) { - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; - return; - } - - /* The old opcodes only support target-endian memory operations. */ - assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); - assert(old_ld_opc[memop & MO_SSIZE] != 0); - - *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64; tcg_add_param_i64(val); tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; *tcg_ctx.gen_opparam_ptr++ = idx; } @@ -973,22 +1000,10 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) } #endif - if (TCG_TARGET_HAS_new_ldst) { - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; - return; - } - - /* The old opcodes only support target-endian memory operations. */ - assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); - assert(old_st_opc[memop & MO_SIZE] != 0); - - *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64; tcg_add_param_i64(val); tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; *tcg_ctx.gen_opparam_ptr++ = idx; } @@ -1052,7 +1067,10 @@ static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) { const char *ret = NULL; if (s->helpers) { - ret = g_hash_table_lookup(s->helpers, (gpointer)val); + TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); + if (info) { + ret = info->name; + } } return ret; } @@ -1129,49 +1147,21 @@ void tcg_dump_ops(TCGContext *s) nb_iargs = arg & 0xffff; nb_cargs = def->nb_cargs; - qemu_log(" %s ", def->name); - - /* function name */ - qemu_log("%s", - tcg_get_arg_str_idx(s, buf, sizeof(buf), - args[nb_oargs + nb_iargs - 1])); - /* flags */ - qemu_log(",$0x%" TCG_PRIlx, args[nb_oargs + nb_iargs]); - /* nb out args */ - qemu_log(",$%d", nb_oargs); - for(i = 0; i < nb_oargs; i++) { - qemu_log(","); - qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + /* function name, flags, out args */ + qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, + tcg_find_helper(s, args[nb_oargs + nb_iargs]), + args[nb_oargs + nb_iargs + 1], nb_oargs); + for (i = 0; i < nb_oargs; i++) { + qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i])); } - for(i = 0; i < (nb_iargs - 1); i++) { - qemu_log(","); - if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) { - qemu_log("<dummy>"); - } else { - qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), - args[nb_oargs + i])); - } - } - } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) { - tcg_target_ulong val; - const char *name; - - nb_oargs = def->nb_oargs; - nb_iargs = def->nb_iargs; - nb_cargs = def->nb_cargs; - qemu_log(" %s %s,$", def->name, - tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0])); - val = args[1]; - name = tcg_find_helper(s, val); - if (name) { - qemu_log("%s", name); - } else { - if (c == INDEX_op_movi_i32) { - qemu_log("0x%x", (uint32_t)val); - } else { - qemu_log("0x%" PRIx64 , (uint64_t)val); + for (i = 0; i < nb_iargs; i++) { + TCGArg arg = args[nb_oargs + i]; + const char *t = "<dummy>"; + if (arg != TCG_CALL_DUMMY_ARG) { + t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); } + qemu_log(",%s", t); } } else { qemu_log(" %s ", def->name); @@ -1434,9 +1424,9 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, temporaries are removed. */ static void tcg_liveness_analysis(TCGContext *s) { - int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; + int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops; TCGOpcode op, op_new, op_new2; - TCGArg *args; + TCGArg *args, arg; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; uint16_t dead_args; @@ -1466,15 +1456,15 @@ static void tcg_liveness_analysis(TCGContext *s) nb_args = args[-1]; args -= nb_args; - nb_iargs = args[0] & 0xffff; - nb_oargs = args[0] >> 16; - args++; - call_flags = args[nb_oargs + nb_iargs]; + arg = *args++; + nb_iargs = arg & 0xffff; + nb_oargs = arg >> 16; + call_flags = args[nb_oargs + nb_iargs + 1]; /* pure functions can be removed if their result is not used */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { - for(i = 0; i < nb_oargs; i++) { + for (i = 0; i < nb_oargs; i++) { arg = args[i]; if (!dead_temps[arg] || mem_temps[arg]) { goto do_not_remove_call; @@ -1488,7 +1478,7 @@ static void tcg_liveness_analysis(TCGContext *s) /* output args are dead */ dead_args = 0; sync_args = 0; - for(i = 0; i < nb_oargs; i++) { + for (i = 0; i < nb_oargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); @@ -1511,7 +1501,7 @@ static void tcg_liveness_analysis(TCGContext *s) } /* input args are live */ - for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg = args[i]; if (arg != TCG_CALL_DUMMY_ARG) { if (dead_temps[arg]) { @@ -2010,13 +2000,15 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, { TCGRegSet allocated_regs; TCGTemp *ts, *ots; - const TCGArgConstraint *arg_ct, *oarg_ct; + TCGType otype, itype; tcg_regset_set(allocated_regs, s->reserved_regs); ots = &s->temps[args[0]]; ts = &s->temps[args[1]]; - oarg_ct = &def->args_ct[0]; - arg_ct = &def->args_ct[1]; + + /* Note that otype != itype for no-op truncation. */ + otype = ots->type; + itype = ts->type; /* If the source value is not in a register, and we're going to be forced to have it in a register in order to perform the copy, @@ -2024,12 +2016,13 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, we don't have to reload SOURCE the next time it is used. */ if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG) || ts->val_type == TEMP_VAL_MEM) { - ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype], + allocated_regs); if (ts->val_type == TEMP_VAL_MEM) { - tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset); + tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset); ts->mem_coherent = 1; } else if (ts->val_type == TEMP_VAL_CONST) { - tcg_out_movi(s, ts->type, ts->reg, ts->val); + tcg_out_movi(s, itype, ts->reg, ts->val); } s->reg_to_temp[ts->reg] = args[1]; ts->val_type = TEMP_VAL_REG; @@ -2044,7 +2037,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, if (!ots->mem_allocated) { temp_allocate_frame(s, args[0]); } - tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset); + tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset); if (IS_DEAD_ARG(1)) { temp_dead(s, args[1]); } @@ -2072,9 +2065,10 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, /* When allocating a new register, make sure to not spill the input one. */ tcg_regset_set_reg(allocated_regs, ts->reg); - ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs); + ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], + allocated_regs); } - tcg_out_mov(s, ots->type, ots->reg, ts->reg); + tcg_out_mov(s, otype, ots->reg, ts->reg); } ots->val_type = TEMP_VAL_REG; ots->mem_coherent = 0; @@ -2121,7 +2115,7 @@ static void tcg_reg_alloc_op(TCGContext *s, ts->mem_coherent = 1; s->reg_to_temp[reg] = arg; } else if (ts->val_type == TEMP_VAL_CONST) { - if (tcg_target_const_match(ts->val, arg_ct)) { + if (tcg_target_const_match(ts->val, ts->type, arg_ct)) { /* constant is OK for instruction */ const_args[i] = 1; new_args[i] = ts->val; @@ -2258,26 +2252,27 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, uint16_t dead_args, uint8_t sync_args) { int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; - TCGArg arg, func_arg; + TCGArg arg; TCGTemp *ts; intptr_t stack_offset; size_t call_stack_size; - uintptr_t func_addr; - int const_func_arg, allocate_args; + tcg_insn_unit *func_addr; + int allocate_args; TCGRegSet allocated_regs; - const TCGArgConstraint *arg_ct; arg = *args++; nb_oargs = arg >> 16; nb_iargs = arg & 0xffff; - nb_params = nb_iargs - 1; + nb_params = nb_iargs; - flags = args[nb_oargs + nb_iargs]; + func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; + flags = args[nb_oargs + nb_iargs + 1]; nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); - if (nb_regs > nb_params) + if (nb_regs > nb_params) { nb_regs = nb_params; + } /* assign stack slots first */ call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long); @@ -2345,40 +2340,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, } } - /* assign function address */ - func_arg = args[nb_oargs + nb_iargs - 1]; - arg_ct = &def->args_ct[0]; - ts = &s->temps[func_arg]; - func_addr = ts->val; - const_func_arg = 0; - if (ts->val_type == TEMP_VAL_MEM) { - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - func_arg = reg; - tcg_regset_set_reg(allocated_regs, reg); - } else if (ts->val_type == TEMP_VAL_REG) { - reg = ts->reg; - if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) { - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_mov(s, ts->type, reg, ts->reg); - } - func_arg = reg; - tcg_regset_set_reg(allocated_regs, reg); - } else if (ts->val_type == TEMP_VAL_CONST) { - if (tcg_target_const_match(func_addr, arg_ct)) { - const_func_arg = 1; - func_arg = func_addr; - } else { - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_movi(s, ts->type, reg, func_addr); - func_arg = reg; - tcg_regset_set_reg(allocated_regs, reg); - } - } else { - tcg_abort(); - } - - /* mark dead temporaries and free the associated registers */ for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { if (IS_DEAD_ARG(i)) { @@ -2403,7 +2364,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, save_globals(s, allocated_regs); } - tcg_out_op(s, opc, &func_arg, &const_func_arg); + tcg_out_call(s, func_addr); /* assign output registers and emit moves if needed */ for(i = 0; i < nb_oargs; i++) { @@ -2411,6 +2372,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, ts = &s->temps[arg]; reg = tcg_target_call_oarg_regs[i]; assert(s->reg_to_temp[reg] == -1); + if (ts->fixed_reg) { if (ts->reg != reg) { tcg_out_mov(s, ts->type, ts->reg, reg); @@ -2452,7 +2414,8 @@ static void dump_op_count(void) #endif -static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, +static inline int tcg_gen_code_common(TCGContext *s, + tcg_insn_unit *gen_code_buf, long search_pc) { TCGOpcode opc; @@ -2567,7 +2530,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, } args += def->nb_args; next: - if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) { + if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) { return op_index; } op_index++; @@ -2581,7 +2544,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, return -1; } -int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) { #ifdef CONFIG_PROFILER { @@ -2600,16 +2563,17 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) tcg_gen_code_common(s, gen_code_buf, -1); /* flush instruction cache */ - flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr); + flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); - return s->code_ptr - gen_code_buf; + return tcg_current_code_size(s); } /* Return the index of the micro operation such as the pc after is < offset bytes from the start of the TB. The contents of gen_code_buf must not be changed, though writing the same values is ok. Return -1 if not found. */ -int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset) +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, + long offset) { return tcg_gen_code_common(s, gen_code_buf, offset); } @@ -2727,7 +2691,8 @@ static int find_string(const char *strtab, const char *str) } static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, - void *debug_frame, size_t debug_frame_size) + const void *debug_frame, + size_t debug_frame_size) { struct __attribute__((packed)) DebugInfo { uint32_t len; @@ -2865,10 +2830,10 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, uintptr_t buf = (uintptr_t)buf_ptr; size_t img_size = sizeof(struct ElfImage) + debug_frame_size; + DebugFrameHeader *dfh; img = g_malloc(img_size); *img = img_template; - memcpy(img + 1, debug_frame, debug_frame_size); img->phdr.p_vaddr = buf; img->phdr.p_paddr = buf; @@ -2896,6 +2861,11 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, img->di.fn_low_pc = buf; img->di.fn_high_pc = buf + buf_size; + dfh = (DebugFrameHeader *)(img + 1); + memcpy(dfh, debug_frame, debug_frame_size); + dfh->fde.func_start = buf; + dfh->fde.func_len = buf_size; + #ifdef DEBUG_JIT /* Enable this block to be able to debug the ELF image file creation. One can use readelf, objdump, or other inspection utilities. */ @@ -2923,7 +2893,8 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, and implement the internal function we declared earlier. */ static void tcg_register_jit_int(void *buf, size_t size, - void *debug_frame, size_t debug_frame_size) + const void *debug_frame, + size_t debug_frame_size) { } @@ -54,8 +54,6 @@ typedef uint64_t tcg_target_ulong; #error unsupported #endif -#include "tcg-runtime.h" - #if TCG_TARGET_NB_REGS <= 32 typedef uint32_t TCGRegSet; #elif TCG_TARGET_NB_REGS <= 64 @@ -66,6 +64,7 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 @@ -97,7 +96,6 @@ typedef uint64_t TCGRegSet; /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 #endif #ifndef TCG_TARGET_deposit_i32_valid @@ -121,6 +119,13 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_rem_i64 0 #endif +/* For 32-bit targets, some sort of unsigned widening multiply is required. */ +#if TCG_TARGET_REG_BITS == 32 \ + && !(defined(TCG_TARGET_HAS_mulu2_i32) \ + || defined(TCG_TARGET_HAS_muluh_i32)) +# error "Missing unsigned widening multiply" +#endif + typedef enum TCGOpcode { #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, #include "tcg-opc.h" @@ -139,10 +144,25 @@ typedef enum TCGOpcode { #define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b) #define tcg_regset_not(d, a) (d) = ~(a) +#ifndef TCG_TARGET_INSN_UNIT_SIZE +# error "Missing TCG_TARGET_INSN_UNIT_SIZE" +#elif TCG_TARGET_INSN_UNIT_SIZE == 1 +typedef uint8_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 2 +typedef uint16_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 4 +typedef uint32_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 8 +typedef uint64_t tcg_insn_unit; +#else +/* The port better have done this. */ +#endif + + typedef struct TCGRelocation { struct TCGRelocation *next; int type; - uint8_t *ptr; + tcg_insn_unit *ptr; intptr_t addend; } TCGRelocation; @@ -150,6 +170,7 @@ typedef struct TCGLabel { int has_value; union { uintptr_t value; + tcg_insn_unit *value_ptr; TCGRelocation *first_reloc; } u; } TCGLabel; @@ -457,7 +478,7 @@ struct TCGContext { int nb_temps; /* goto_tb support */ - uint8_t *code_buf; + tcg_insn_unit *code_buf; uintptr_t *tb_next; uint16_t *tb_next_offset; uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ @@ -478,7 +499,7 @@ struct TCGContext { intptr_t frame_end; int frame_reg; - uint8_t *code_ptr; + tcg_insn_unit *code_ptr; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; @@ -517,14 +538,17 @@ struct TCGContext { uint16_t gen_opc_icount[OPC_BUF_SIZE]; uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; - /* Code generation */ + /* Code generation. Note that we specifically do not use tcg_insn_unit + here, because there's too much arithmetic throughout that relies + on addition and subtraction working on bytes. Rely on the GCC + extension that allows arithmetic on void*. */ int code_gen_max_blocks; - uint8_t *code_gen_prologue; - uint8_t *code_gen_buffer; + void *code_gen_prologue; + void *code_gen_buffer; size_t code_gen_buffer_size; /* threshold to flush the translated code buffer */ size_t code_gen_buffer_max_size; - uint8_t *code_gen_ptr; + void *code_gen_ptr; TBContext tb_ctx; @@ -559,8 +583,9 @@ void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); -int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); -int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset); +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, + long offset); void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size); @@ -698,8 +723,8 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); #define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T)) #endif -void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, - int sizemask, TCGArg ret, int nargs, TCGArg *args); +void tcg_gen_callN(TCGContext *s, void *func, + TCGArg ret, int nargs, TCGArg *args); void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, int c, int right, int arith); @@ -717,6 +742,51 @@ TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); /** + * tcg_ptr_byte_diff + * @a, @b: addresses to be differenced + * + * There are many places within the TCG backends where we need a byte + * difference between two pointers. While this can be accomplished + * with local casting, it's easy to get wrong -- especially if one is + * concerned with the signedness of the result. + * + * This version relies on GCC's void pointer arithmetic to get the + * correct result. + */ + +static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) +{ + return a - b; +} + +/** + * tcg_pcrel_diff + * @s: the tcg context + * @target: address of the target + * + * Produce a pc-relative difference, from the current code_ptr + * to the destination address. + */ + +static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target) +{ + return tcg_ptr_byte_diff(target, s->code_ptr); +} + +/** + * tcg_current_code_size + * @s: the tcg context + * + * Compute the current code size within the translation block. + * This is used to fill in qemu's data structures for goto_tb. + */ + +static inline size_t tcg_current_code_size(TCGContext *s) +{ + return tcg_ptr_byte_diff(s->code_ptr, s->code_buf); +} + +/** * tcg_qemu_tb_exec: * @env: CPUArchState * for the CPU * @tb_ptr: address of generated code for the TB to execute @@ -839,19 +909,6 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, # define helper_ret_stq_mmu helper_le_stq_mmu #endif -uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); -uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); -uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); -uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); - -void helper_stb_mmu(CPUArchState *env, target_ulong addr, - uint8_t val, int mmu_idx); -void helper_stw_mmu(CPUArchState *env, target_ulong addr, - uint16_t val, int mmu_idx); -void helper_stl_mmu(CPUArchState *env, target_ulong addr, - uint32_t val, int mmu_idx); -void helper_stq_mmu(CPUArchState *env, target_ulong addr, - uint64_t val, int mmu_idx); #endif /* CONFIG_SOFTMMU */ #endif /* TCG_H */ diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index fc80704de..03a7b4695 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -59,12 +59,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { { INDEX_op_exit_tb, { NULL } }, { INDEX_op_goto_tb, { NULL } }, - { INDEX_op_call, { RI } }, { INDEX_op_br, { NULL } }, - { INDEX_op_mov_i32, { R, R } }, - { INDEX_op_movi_i32, { R } }, - { INDEX_op_ld8u_i32, { R, R } }, { INDEX_op_ld8s_i32, { R, R } }, { INDEX_op_ld16u_i32, { R, R } }, @@ -141,9 +137,6 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { #endif #if TCG_TARGET_REG_BITS == 64 - { INDEX_op_mov_i64, { R, R } }, - { INDEX_op_movi_i64, { R } }, - { INDEX_op_ld8u_i64, { R, R } }, { INDEX_op_ld8s_i64, { R, R } }, { INDEX_op_ld16u_i64, { R, R } }, @@ -234,21 +227,11 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { #endif #endif /* TCG_TARGET_REG_BITS == 64 */ - { INDEX_op_qemu_ld8u, { R, L } }, - { INDEX_op_qemu_ld8s, { R, L } }, - { INDEX_op_qemu_ld16u, { R, L } }, - { INDEX_op_qemu_ld16s, { R, L } }, - { INDEX_op_qemu_ld32, { R, L } }, -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld32u, { R, L } }, - { INDEX_op_qemu_ld32s, { R, L } }, -#endif - { INDEX_op_qemu_ld64, { R64, L } }, + { INDEX_op_qemu_ld_i32, { R, L } }, + { INDEX_op_qemu_ld_i64, { R64, L } }, - { INDEX_op_qemu_st8, { R, S } }, - { INDEX_op_qemu_st16, { R, S } }, - { INDEX_op_qemu_st32, { R, S } }, - { INDEX_op_qemu_st64, { R64, S } }, + { INDEX_op_qemu_st_i32, { R, S } }, + { INDEX_op_qemu_st_i64, { R64, S } }, #if TCG_TARGET_HAS_ext8s_i32 { INDEX_op_ext8s_i32, { R, R } }, @@ -371,14 +354,18 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { /* tcg_out_reloc always uses the same type, addend. */ assert(type == sizeof(tcg_target_long)); assert(addend == 0); assert(value != 0); - *(tcg_target_long *)code_ptr = value; + if (TCG_TARGET_REG_BITS == 32) { + tcg_patch32(code_ptr, value); + } else { + tcg_patch64(code_ptr, value); + } } /* Parse target specific constraints. */ @@ -413,8 +400,11 @@ void tci_disas(uint8_t opc) /* Write value (native size). */ static void tcg_out_i(TCGContext *s, tcg_target_ulong v) { - *(tcg_target_ulong *)s->code_ptr = v; - s->code_ptr += sizeof(tcg_target_ulong); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out32(s, v); + } else { + tcg_out64(s, v); + } } /* Write opcode. */ @@ -542,6 +532,14 @@ static void tcg_out_movi(TCGContext *s, TCGType type, old_code_ptr[1] = s->code_ptr - old_code_ptr; } +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + tcg_out_op_t(s, INDEX_op_call); + tcg_out_ri(s, 1, (uintptr_t)arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -557,21 +555,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (s->tb_jmp_offset) { /* Direct jump method. */ assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* Indirect jump method. */ TODO(); } assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tci_out_label(s, args[0]); break; - case INDEX_op_call: - tcg_out_ri(s, const_args[0], args[0]); - break; case INDEX_op_setcond_i32: tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); @@ -596,9 +591,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out8(s, args[3]); /* condition */ break; #endif - case INDEX_op_movi_i32: - TODO(); /* Handled by tcg_out_movi? */ - break; case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -654,10 +646,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_mov_i64: - case INDEX_op_movi_i64: - TODO(); - break; case INDEX_op_add_i64: case INDEX_op_sub_i64: case INDEX_op_mul_i64: @@ -769,67 +757,62 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out8(s, args[2]); /* condition */ tci_out_label(s, args[3]); break; - case INDEX_op_qemu_ld8u: - case INDEX_op_qemu_ld8s: - case INDEX_op_qemu_ld16u: - case INDEX_op_qemu_ld16s: - case INDEX_op_qemu_ld32: -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_qemu_ld32s: - case INDEX_op_qemu_ld32u: -#endif - tcg_out_r(s, *args++); + case INDEX_op_qemu_ld_i32: tcg_out_r(s, *args++); -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS tcg_out_r(s, *args++); -#endif + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); #ifdef CONFIG_SOFTMMU tcg_out_i(s, *args); #endif break; - case INDEX_op_qemu_ld64: + case INDEX_op_qemu_ld_i64: tcg_out_r(s, *args++); -#if TCG_TARGET_REG_BITS == 32 - tcg_out_r(s, *args++); -#endif - tcg_out_r(s, *args++); -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } tcg_out_r(s, *args++); -#endif + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); #ifdef CONFIG_SOFTMMU tcg_out_i(s, *args); #endif break; - case INDEX_op_qemu_st8: - case INDEX_op_qemu_st16: - case INDEX_op_qemu_st32: - tcg_out_r(s, *args++); + case INDEX_op_qemu_st_i32: tcg_out_r(s, *args++); -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS tcg_out_r(s, *args++); -#endif + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); #ifdef CONFIG_SOFTMMU tcg_out_i(s, *args); #endif break; - case INDEX_op_qemu_st64: + case INDEX_op_qemu_st_i64: tcg_out_r(s, *args++); -#if TCG_TARGET_REG_BITS == 32 - tcg_out_r(s, *args++); -#endif - tcg_out_r(s, *args++); -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } tcg_out_r(s, *args++); -#endif + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); #ifdef CONFIG_SOFTMMU tcg_out_i(s, *args); #endif break; - case INDEX_op_end: - TODO(); - break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: - fprintf(stderr, "Missing: %s\n", tcg_op_defs[opc].name); tcg_abort(); } old_code_ptr[1] = s->code_ptr - old_code_ptr; @@ -859,7 +842,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, } /* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, +static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { /* No need to return 0 or 1, 0 or != 0 is good enough. */ diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 6e1da8c00..bd1e97468 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -43,6 +43,7 @@ #include "config-host.h" #define TCG_TARGET_INTERPRETER 1 +#define TCG_TARGET_INSN_UNIT_SIZE 1 #if UINTPTR_MAX == UINT32_MAX # define TCG_TARGET_REG_BITS 32 @@ -57,12 +58,6 @@ #define CONFIG_DEBUG_TCG_INTERPRETER #endif -#if 0 /* TCI tries to emulate a little endian host. */ -#if defined(HOST_WORDS_BIGENDIAN) -# define TCG_TARGET_WORDS_BIGENDIAN -#endif -#endif - /* Optional instructions. */ #define TCG_TARGET_HAS_bswap16_i32 1 @@ -88,6 +83,7 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 @@ -118,10 +114,10 @@ #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 +#else +#define TCG_TARGET_HAS_mulu2_i32 1 #endif /* TCG_TARGET_REG_BITS == 64 */ -#define TCG_TARGET_HAS_new_ldst 0 - /* Number of registers available. For 32 bit hosts, we need more than 8 registers (call arguments). */ /* #define TCG_TARGET_NB_REGS 8 */ |