diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 44 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.c | 1554 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 101 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 1589 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 19 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.c | 39 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 131 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 16 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 2 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 13 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 18 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/optimize.c | 217 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 16 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 7 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.c | 1379 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 67 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 552 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 27 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 45 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 13 | ||||
-rw-r--r-- | tcg/tcg-op.h | 264 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 48 | ||||
-rw-r--r-- | tcg/tcg-runtime.h | 2 | ||||
-rw-r--r-- | tcg/tcg.c | 73 | ||||
-rw-r--r-- | tcg/tcg.h | 89 | ||||
-rw-r--r-- | tcg/tci/README | 2 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c | 27 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 23 |
30 files changed, 4722 insertions, 1665 deletions
diff --git a/tcg/README b/tcg/README index ec1ac7937..063aeb95e 100644 --- a/tcg/README +++ b/tcg/README @@ -14,6 +14,10 @@ the emulated architecture. As TCG started as a generic C backend used for cross compiling, it is assumed that the TCG target is different from the host, although it is never the case for QEMU. +In this document, we use "guest" to specify what architecture we are +emulating; "target" always means the TCG target, the machine on which +we are running QEMU. + A TCG "function" corresponds to a QEMU Translated Block (TB). A TCG "temporary" is a variable only live in a basic @@ -361,7 +365,25 @@ Write 8, 16, 32 or 64 bits to host memory. All this opcodes assume that the pointed host memory doesn't correspond to a global. In the latter case the behaviour is unpredictable. -********* 64-bit target on 32-bit host support +********* Multiword arithmetic support + +* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the double-word inputs T1 and T2 are +formed from two single-word arguments, and the double-word output T0 +is returned in two single-word outputs. + +* mulu2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mul, except two unsigned inputs T1 and T2 yielding the full +double-word product T0. The later is returned in two single-word outputs. + +* muls2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mulu2, except the two inputs T1 and T2 are signed. + +********* 64-bit guest on 32-bit host support The following opcodes are internal to TCG. Thus they are to be implemented by 32-bit host code generators, but are not to be emitted by guest translators. @@ -372,18 +394,6 @@ They are emitted as needed by inline functions within "tcg-op.h". Similar to brcond, except that the 64-bit values T0 and T1 are formed from two 32-bit arguments. -* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high -* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high - -Similar to add/sub, except that the 64-bit inputs T1 and T2 are -formed from two 32-bit arguments, and the 64-bit output T0 -is returned in two 32-bit outputs. - -* mulu2_i32 t0_low, t0_high, t1, t2 - -Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding -the full 64-bit product T0. The later is returned in two 32-bit outputs. - * setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond Similar to setcond, except that the 64-bit values T1 and T2 are @@ -515,9 +525,9 @@ register. a better generated code, but it reduces the memory usage of TCG and the speed of the translation. -- Don't hesitate to use helpers for complicated or seldom used target +- Don't hesitate to use helpers for complicated or seldom used guest instructions. There is little performance advantage in using TCG to - implement target instructions taking more than about twenty TCG + implement guest instructions taking more than about twenty TCG instructions. Note that this rule of thumb is more applicable to helpers doing complex logic or arithmetic, where the C compiler has scope to do a good job of optimisation; it is less relevant where @@ -525,9 +535,9 @@ register. inline TCG may still be faster for longer sequences. - The hard limit on the number of TCG instructions you can generate - per target instruction is set by MAX_OP_PER_INSTR in exec-all.h -- + per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h -- you cannot exceed this without risking a buffer overrun. - Use the 'discard' instruction if you know that TCG won't be able to prove that a given global is "dead" at a given program point. The - x86 target uses it to improve the condition codes optimisation. + x86 guest uses it to improve the condition codes optimisation. diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c new file mode 100644 index 000000000..41a17f8a6 --- /dev/null +++ b/tcg/aarch64/tcg-target.c @@ -0,0 +1,1554 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#include "qemu/bitops.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", + "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", + "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", + "%x24", "%x25", "%x26", "%x27", "%x28", + "%fp", /* frame pointer */ + "%lr", /* link register */ + "%sp", /* stack pointer */ +}; +#endif /* NDEBUG */ + +#ifdef TARGET_WORDS_BIGENDIAN + #define TCG_LDST_BSWAP 1 +#else + #define TCG_LDST_BSWAP 0 +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, + TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, + TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ + + TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, + TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, + TCG_REG_X16, TCG_REG_X17, + + TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */ + + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, + + TCG_REG_X8, /* will not use, see tcg_target_init */ +}; + +static const int tcg_target_call_iarg_regs[8] = { + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 +}; +static const int tcg_target_call_oarg_regs[1] = { + TCG_REG_X0 +}; + +#define TCG_REG_TMP TCG_REG_X8 + +#ifndef CONFIG_SOFTMMU +# if defined(CONFIG_USE_GUEST_BASE) +# define TCG_REG_GUEST_BASE TCG_REG_X28 +# else +# define TCG_REG_GUEST_BASE TCG_REG_XZR +# endif +#endif + +static inline void reloc_pc26(void *code_ptr, tcg_target_long target) +{ + tcg_target_long offset; uint32_t insn; + offset = (target - (tcg_target_long)code_ptr) / 4; + /* read instruction, mask away previous PC_REL26 parameter contents, + set the proper offset, then write back the instruction. */ + insn = *(uint32_t *)code_ptr; + insn = deposit32(insn, 0, 26, offset); + *(uint32_t *)code_ptr = insn; +} + +static inline void reloc_pc19(void *code_ptr, tcg_target_long target) +{ + tcg_target_long offset; uint32_t insn; + offset = (target - (tcg_target_long)code_ptr) / 4; + /* read instruction, mask away previous PC_REL19 parameter contents, + set the proper offset, then write back the instruction. */ + insn = *(uint32_t *)code_ptr; + insn = deposit32(insn, 5, 19, offset); + *(uint32_t *)code_ptr = insn; +} + +static inline void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend) +{ + value += addend; + + switch (type) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + reloc_pc26(code_ptr, value); + break; + case R_AARCH64_CONDBR19: + reloc_pc19(code_ptr, value); + break; + + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, + const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); + break; + case 'l': /* qemu_ld / qemu_st address, data_reg */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* x0 and x1 will be overwritten when reading the tlb entry, + and x2, and x3 for helper args, better to avoid using them. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); +#endif + break; + default: + return -1; + } + + ct_str++; + *pct_str = ct_str; + return 0; +} + +static inline int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + + return 0; +} + +enum aarch64_cond_code { + COND_EQ = 0x0, + COND_NE = 0x1, + COND_CS = 0x2, /* Unsigned greater or equal */ + COND_HS = COND_CS, /* ALIAS greater or equal */ + COND_CC = 0x3, /* Unsigned less than */ + COND_LO = COND_CC, /* ALIAS Lower */ + COND_MI = 0x4, /* Negative */ + COND_PL = 0x5, /* Zero or greater */ + COND_VS = 0x6, /* Overflow */ + COND_VC = 0x7, /* No overflow */ + COND_HI = 0x8, /* Unsigned greater than */ + COND_LS = 0x9, /* Unsigned less or equal */ + COND_GE = 0xa, + COND_LT = 0xb, + COND_GT = 0xc, + COND_LE = 0xd, + COND_AL = 0xe, + COND_NV = 0xf, /* behaves like COND_AL here */ +}; + +static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_GT, + /* unsigned */ + [TCG_COND_LTU] = COND_LO, + [TCG_COND_GTU] = COND_HI, + [TCG_COND_GEU] = COND_HS, + [TCG_COND_LEU] = COND_LS, +}; + +/* opcodes for LDR / STR instructions with base + simm9 addressing */ +enum aarch64_ldst_op_data { /* size of the data moved */ + LDST_8 = 0x38, + LDST_16 = 0x78, + LDST_32 = 0xb8, + LDST_64 = 0xf8, +}; +enum aarch64_ldst_op_type { /* type of operation */ + LDST_ST = 0x0, /* store */ + LDST_LD = 0x4, /* load */ + LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */ + LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */ +}; + +enum aarch64_arith_opc { + ARITH_AND = 0x0a, + ARITH_ADD = 0x0b, + ARITH_OR = 0x2a, + ARITH_ADDS = 0x2b, + ARITH_XOR = 0x4a, + ARITH_SUB = 0x4b, + ARITH_ANDS = 0x6a, + ARITH_SUBS = 0x6b, +}; + +enum aarch64_srr_opc { + SRR_SHL = 0x0, + SRR_SHR = 0x4, + SRR_SAR = 0x8, + SRR_ROR = 0xc +}; + +static inline enum aarch64_ldst_op_data +aarch64_ldst_get_data(TCGOpcode tcg_op) +{ + switch (tcg_op) { + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + return LDST_8; + + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + return LDST_16; + + case INDEX_op_ld_i32: + case INDEX_op_st_i32: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_st32_i64: + return LDST_32; + + case INDEX_op_ld_i64: + case INDEX_op_st_i64: + return LDST_64; + + default: + tcg_abort(); + } +} + +static inline enum aarch64_ldst_op_type +aarch64_ldst_get_type(TCGOpcode tcg_op) +{ + switch (tcg_op) { + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + return LDST_ST; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + case INDEX_op_ld_i64: + return LDST_LD; + + case INDEX_op_ld8s_i32: + case INDEX_op_ld16s_i32: + return LDST_LD_S_W; + + case INDEX_op_ld8s_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32s_i64: + return LDST_LD_S_X; + + default: + tcg_abort(); + } +} + +static inline uint32_t tcg_in32(TCGContext *s) +{ + uint32_t v = *(uint32_t *)s->code_ptr; + return v; +} + +static inline void tcg_out_ldst_9(TCGContext *s, + enum aarch64_ldst_op_data op_data, + enum aarch64_ldst_op_type op_type, + TCGReg rd, TCGReg rn, tcg_target_long offset) +{ + /* use LDUR with BASE register with 9bit signed unscaled offset */ + unsigned int mod, off; + + if (offset < 0) { + off = (256 + offset); + mod = 0x1; + } else { + off = offset; + mod = 0x0; + } + + mod |= op_type; + tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd); +} + +/* tcg_out_ldst_12 expects a scaled unsigned immediate offset */ +static inline void tcg_out_ldst_12(TCGContext *s, + enum aarch64_ldst_op_data op_data, + enum aarch64_ldst_op_type op_type, + TCGReg rd, TCGReg rn, + tcg_target_ulong scaled_uimm) +{ + tcg_out32(s, (op_data | 1) << 24 + | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd); +} + +static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src) +{ + /* register to register move using MOV (shifted register with no shift) */ + /* using MOV 0x2a0003e0 | (shift).. */ + unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0; + tcg_out32(s, base | src << 16 | rd); +} + +static inline void tcg_out_movi_aux(TCGContext *s, + TCGReg rd, uint64_t value) +{ + uint32_t half, base, shift, movk = 0; + /* construct halfwords of the immediate with MOVZ/MOVK with LSL */ + /* using MOVZ 0x52800000 | extended reg.. */ + base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000; + /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the + first MOVZ with the half-word immediate skipping the zeros, with a shift + (LSL) equal to this number. Then morph all next instructions into MOVKs. + Zero the processed half-word in the value, continue until empty. + We build the final result 16bits at a time with up to 4 instructions, + but do not emit instructions for 16bit zero holes. */ + do { + shift = ctz64(value) & (63 & -16); + half = (value >> shift) & 0xffff; + tcg_out32(s, base | movk | shift << 17 | half << 5 | rd); + movk = 0x20000000; /* morph next MOVZs into MOVKs */ + value &= ~(0xffffUL << shift); + } while (value); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg rd, tcg_target_long value) +{ + if (type == TCG_TYPE_I64) { + tcg_out_movi_aux(s, rd, value); + } else { + tcg_out_movi_aux(s, rd, value & 0xffffffff); + } +} + +static inline void tcg_out_ldst_r(TCGContext *s, + enum aarch64_ldst_op_data op_data, + enum aarch64_ldst_op_type op_type, + TCGReg rd, TCGReg base, TCGReg regoff) +{ + /* load from memory to register using base + 64bit register offset */ + /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */ + /* the 0x6000 is for the "no extend field" */ + tcg_out32(s, 0x00206800 + | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd); +} + +/* solve the whole ldst problem */ +static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data, + enum aarch64_ldst_op_type type, + TCGReg rd, TCGReg rn, tcg_target_long offset) +{ + if (offset >= -256 && offset < 256) { + tcg_out_ldst_9(s, data, type, rd, rn, offset); + return; + } + + if (offset >= 256) { + /* if the offset is naturally aligned and in range, + then we can use the scaled uimm12 encoding */ + unsigned int s_bits = data >> 6; + if (!(offset & ((1 << s_bits) - 1))) { + tcg_target_ulong scaled_uimm = offset >> s_bits; + if (scaled_uimm <= 0xfff) { + tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm); + return; + } + } + } + + /* worst-case scenario, move offset to temp register, use reg offset */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); + tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP); +} + +/* mov alias implemented with add immediate, useful to move to/from SP */ +static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn) +{ + /* using ADD 0x11000000 | (ext) | rn << 5 | rd */ + unsigned int base = ext ? 0x91000000 : 0x11000000; + tcg_out32(s, base | rn << 5 | rd); +} + +static inline void tcg_out_mov(TCGContext *s, + TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD, + arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST, + arg, arg1, arg2); +} + +static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc, + int ext, TCGReg rd, TCGReg rn, TCGReg rm, + int shift_imm) +{ + /* Using shifted register arithmetic operations */ + /* if extended register operation (64bit) just OR with 0x80 << 24 */ + unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24; + if (shift_imm == 0) { + shift = 0; + } else if (shift_imm > 0) { + shift = shift_imm << 10 | 1 << 22; + } else /* (shift_imm < 0) */ { + shift = (-shift_imm) << 10; + } + tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd); +} + +static inline void tcg_out_mul(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, TCGReg rm) +{ + /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */ + unsigned int base = ext ? 0x9b007c00 : 0x1b007c00; + tcg_out32(s, base | rm << 16 | rn << 5 | rd); +} + +static inline void tcg_out_shiftrot_reg(TCGContext *s, + enum aarch64_srr_opc opc, int ext, + TCGReg rd, TCGReg rn, TCGReg rm) +{ + /* using 2-source data processing instructions 0x1ac02000 */ + unsigned int base = ext ? 0x9ac02000 : 0x1ac02000; + tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd); +} + +static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn, + unsigned int a, unsigned int b) +{ + /* Using UBFM 0x53000000 Wd, Wn, a, b */ + unsigned int base = ext ? 0xd3400000 : 0x53000000; + tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd); +} + +static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn, + unsigned int a, unsigned int b) +{ + /* Using SBFM 0x13000000 Wd, Wn, a, b */ + unsigned int base = ext ? 0x93400000 : 0x13000000; + tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd); +} + +static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd, + TCGReg rn, TCGReg rm, unsigned int a) +{ + /* Using EXTR 0x13800000 Wd, Wn, Wm, a */ + unsigned int base = ext ? 0x93c00000 : 0x13800000; + tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd); +} + +static inline void tcg_out_shl(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits, max; + bits = ext ? 64 : 32; + max = bits - 1; + tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); +} + +static inline void tcg_out_shr(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_ubfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_sar(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_sbfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_rotr(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_extr(s, ext, rd, rn, rn, m & max); +} + +static inline void tcg_out_rotl(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits, max; + bits = ext ? 64 : 32; + max = bits - 1; + tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); +} + +static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm, + int shift_imm) +{ + /* Using CMP alias SUBS wzr, Wn, Wm */ + tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm); +} + +static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c) +{ + /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */ + unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0; + tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd); +} + +static inline void tcg_out_goto(TCGContext *s, tcg_target_long target) +{ + tcg_target_long offset; + offset = (target - (tcg_target_long)s->code_ptr) / 4; + + if (offset < -0x02000000 || offset >= 0x02000000) { + /* out of 26bit range */ + tcg_abort(); + } + + tcg_out32(s, 0x14000000 | (offset & 0x03ffffff)); +} + +static inline void tcg_out_goto_noaddr(TCGContext *s) +{ + /* We pay attention here to not modify the branch target by + reading from the buffer. This ensure that caches and memory are + kept coherent during retranslation. + Mask away possible garbage in the high bits for the first translation, + while keeping the offset bits for retranslation. */ + uint32_t insn; + insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000; + tcg_out32(s, insn); +} + +static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) +{ + /* see comments in tcg_out_goto_noaddr */ + uint32_t insn; + insn = tcg_in32(s) & (0x07ffff << 5); + insn |= 0x54000000 | tcg_cond_to_aarch64[c]; + tcg_out32(s, insn); +} + +static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, + tcg_target_long target) +{ + tcg_target_long offset; + offset = (target - (tcg_target_long)s->code_ptr) / 4; + + if (offset < -0x40000 || offset >= 0x40000) { + /* out of 19bit range */ + tcg_abort(); + } + + offset &= 0x7ffff; + tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5); +} + +static inline void tcg_out_callr(TCGContext *s, TCGReg reg) +{ + tcg_out32(s, 0xd63f0000 | reg << 5); +} + +static inline void tcg_out_gotor(TCGContext *s, TCGReg reg) +{ + tcg_out32(s, 0xd61f0000 | reg << 5); +} + +static inline void tcg_out_call(TCGContext *s, tcg_target_long target) +{ + tcg_target_long offset; + + offset = (target - (tcg_target_long)s->code_ptr) / 4; + + if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target); + tcg_out_callr(s, TCG_REG_TMP); + } else { + tcg_out32(s, 0x94000000 | (offset & 0x03ffffff)); + } +} + +/* encode a logical immediate, mapping user parameter + M=set bits pattern length to S=M-1 */ +static inline unsigned int +aarch64_limm(unsigned int m, unsigned int r) +{ + assert(m > 0); + return r << 16 | (m - 1) << 10; +} + +/* test a register against an immediate bit pattern made of + M set bits rotated right by R. + Examples: + to test a 32/64 reg against 0x00000007, pass M = 3, R = 0. + to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0. + to test a 32bit reg against 0xff000000, pass M = 8, R = 8. + to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8. + */ +static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn, + unsigned int m, unsigned int r) +{ + /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */ + unsigned int base = ext ? 0xf240001f : 0x7200001f; + tcg_out32(s, base | aarch64_limm(m, r) | rn << 5); +} + +/* and a register with a bit pattern, similarly to TST, no flags change */ +static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn, + unsigned int m, unsigned int r) +{ + /* using AND 0x12000000 */ + unsigned int base = ext ? 0x92400000 : 0x12000000; + tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd); +} + +static inline void tcg_out_ret(TCGContext *s) +{ + /* emit RET { LR } */ + tcg_out32(s, 0xd65f03c0); +} + +void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_target_long target, offset; + target = (tcg_target_long)addr; + offset = (target - (tcg_target_long)jmp_addr) / 4; + + if (offset < -0x02000000 || offset >= 0x02000000) { + /* out of 26bit range */ + tcg_abort(); + } + + patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0); + flush_icache_range(jmp_addr, jmp_addr + 4); +} + +static inline void tcg_out_goto_label(TCGContext *s, int label_index) +{ + TCGLabel *l = &s->labels[label_index]; + + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0); + tcg_out_goto_noaddr(s); + } else { + tcg_out_goto(s, l->u.value); + } +} + +static inline void tcg_out_goto_label_cond(TCGContext *s, + TCGCond c, int label_index) +{ + TCGLabel *l = &s->labels[label_index]; + + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0); + tcg_out_goto_cond_noaddr(s, c); + } else { + tcg_out_goto_cond(s, c, l->u.value); + } +} + +static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm) +{ + /* using REV 0x5ac00800 */ + unsigned int base = ext ? 0xdac00c00 : 0x5ac00800; + tcg_out32(s, base | rm << 5 | rd); +} + +static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm) +{ + /* using REV16 0x5ac00400 */ + unsigned int base = ext ? 0xdac00400 : 0x5ac00400; + tcg_out32(s, base | rm << 5 | rd); +} + +static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits, + TCGReg rd, TCGReg rn) +{ + /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00 + of SBFM Xd, Xn, #0, #7|15|31 */ + int bits = 8 * (1 << s_bits) - 1; + tcg_out_sbfm(s, ext, rd, rn, 0, bits); +} + +static inline void tcg_out_uxt(TCGContext *s, int s_bits, + TCGReg rd, TCGReg rn) +{ + /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00 + of UBFM Wd, Wn, #0, #7|15 */ + int bits = 8 * (1 << s_bits) - 1; + tcg_out_ubfm(s, 0, rd, rn, 0, bits); +} + +static inline void tcg_out_addi(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int aimm) +{ + /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */ + /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */ + unsigned int base = ext ? 0x91000000 : 0x11000000; + + if (aimm <= 0xfff) { + aimm <<= 10; + } else { + /* we can only shift left by 12, on assert we cannot represent */ + assert(!(aimm & 0xfff)); + assert(aimm <= 0xfff000); + base |= 1 << 22; /* apply LSL 12 */ + aimm >>= 2; + } + + tcg_out32(s, base | aimm | (rn << 5) | rd); +} + +static inline void tcg_out_subi(TCGContext *s, int ext, + TCGReg rd, TCGReg rn, unsigned int aimm) +{ + /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */ + /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */ + unsigned int base = ext ? 0xd1000000 : 0x51000000; + + if (aimm <= 0xfff) { + aimm <<= 10; + } else { + /* we can only shift left by 12, on assert we cannot represent */ + assert(!(aimm & 0xfff)); + assert(aimm <= 0xfff000); + base |= 1 << 22; /* apply LSL 12 */ + aimm >>= 2; + } + + tcg_out32(s, base | aimm | (rn << 5) | rd); +} + +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out32(s, 0xd503201f); +} + +#ifdef CONFIG_SOFTMMU +#include "exec/softmmu_defs.h" + +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + int mmu_idx) */ +static const void * const qemu_ld_helpers[4] = { + helper_ldb_mmu, + helper_ldw_mmu, + helper_ldl_mmu, + helper_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + uintxx_t val, int mmu_idx) */ +static const void * const qemu_st_helpers[4] = { + helper_stb_mmu, + helper_stw_mmu, + helper_stl_mmu, + helper_stq_mmu, +}; + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); + tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, + (tcg_target_long)qemu_ld_helpers[lb->opc & 3]); + tcg_out_callr(s, TCG_REG_TMP); + if (lb->opc & 0x04) { + tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0); + } else { + tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0); + } + + tcg_out_goto(s, (tcg_target_long)lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); + tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); + tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, + (tcg_target_long)qemu_st_helpers[lb->opc & 3]); + tcg_out_callr(s, TCG_REG_TMP); + + tcg_out_nop(s); + tcg_out_goto(s, (tcg_target_long)lb->raddr); +} + +void tcg_out_tb_finalize(TCGContext *s) +{ + int i; + for (i = 0; i < s->nb_qemu_ldst_labels; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, + TCGReg data_reg, TCGReg addr_reg, + int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + int idx; + TCGLabelQemuLdst *label; + + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); + } + + idx = s->nb_qemu_ldst_labels++; + label = &s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +/* Load and compare a TLB entry, emitting the conditional jump to the + slow path for the failure case, which will be patched later when finalizing + the slow path. Generated code returns the host addend in X1, + clobbers X0,X2,X3,TMP. */ +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, + int s_bits, uint8_t **label_ptr, int mem_index, int is_read) +{ + TCGReg base = TCG_AREG0; + int tlb_offset = is_read ? + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + /* Extract the TLB index from the address into X0. + X0<CPU_TLB_BITS:0> = + addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */ + tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg, + TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); + /* Store the page mask part of the address and the low s_bits into X3. + Later this allows checking for equality and alignment at the same time. + X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ + tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg, + (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits, + (TARGET_LONG_BITS - TARGET_PAGE_BITS)); + /* Add any "high bits" from the tlb offset to the env address into X2, + to take advantage of the LSL12 form of the addi instruction. + X2 = env + (tlb_offset & 0xfff000) */ + tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000); + /* Merge the tlb index contribution into X2. + X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ + tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2, + TCG_REG_X0, -CPU_TLB_ENTRY_BITS); + /* Merge "low bits" from tlb offset, load the tlb comparator into X0. + X0 = load [X2 + (tlb_offset & 0x000fff)] */ + tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32, + LDST_LD, TCG_REG_X0, TCG_REG_X2, + (tlb_offset & 0xfff)); + /* Load the tlb addend. Do that early to avoid stalling. + X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ + tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2, + (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - + (is_read ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write))); + /* Perform the address comparison. */ + tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); + *label_ptr = s->code_ptr; + /* If not equal, we jump to the slow path. */ + tcg_out_goto_cond_noaddr(s, TCG_COND_NE); +} + +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, + TCGReg addr_r, TCGReg off_r) +{ + switch (opc) { + case 0: + tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r); + break; + case 0 | 4: + tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r); + break; + case 1: + tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r); + if (TCG_LDST_BSWAP) { + tcg_out_rev16(s, 0, data_r, data_r); + } + break; + case 1 | 4: + if (TCG_LDST_BSWAP) { + tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r); + tcg_out_rev16(s, 0, data_r, data_r); + tcg_out_sxt(s, 1, 1, data_r, data_r); + } else { + tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r); + } + break; + case 2: + tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); + if (TCG_LDST_BSWAP) { + tcg_out_rev(s, 0, data_r, data_r); + } + break; + case 2 | 4: + if (TCG_LDST_BSWAP) { + tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); + tcg_out_rev(s, 0, data_r, data_r); + tcg_out_sxt(s, 1, 2, data_r, data_r); + } else { + tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r); + } + break; + case 3: + tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r); + if (TCG_LDST_BSWAP) { + tcg_out_rev(s, 1, data_r, data_r); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r, + TCGReg addr_r, TCGReg off_r) +{ + switch (opc) { + case 0: + tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r); + break; + case 1: + if (TCG_LDST_BSWAP) { + tcg_out_rev16(s, 0, TCG_REG_TMP, data_r); + tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r); + } else { + tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r); + } + break; + case 2: + if (TCG_LDST_BSWAP) { + tcg_out_rev(s, 0, TCG_REG_TMP, data_r); + tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r); + } else { + tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r); + } + break; + case 3: + if (TCG_LDST_BSWAP) { + tcg_out_rev(s, 1, TCG_REG_TMP, data_r); + tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r); + } else { + tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +{ + TCGReg addr_reg, data_reg; +#ifdef CONFIG_SOFTMMU + int mem_index, s_bits; + uint8_t *label_ptr; +#endif + data_reg = args[0]; + addr_reg = args[1]; + +#ifdef CONFIG_SOFTMMU + mem_index = args[2]; + s_bits = opc & 3; + tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); + tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1); + add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, + GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +{ + TCGReg addr_reg, data_reg; +#ifdef CONFIG_SOFTMMU + int mem_index, s_bits; + uint8_t *label_ptr; +#endif + data_reg = args[0]; + addr_reg = args[1]; + +#ifdef CONFIG_SOFTMMU + mem_index = args[2]; + s_bits = opc & 3; + + tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); + tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1); + add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, + GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR); +#endif /* CONFIG_SOFTMMU */ +} + +static uint8_t *tb_ret_addr; + +/* callee stack use example: + stp x29, x30, [sp,#-32]! + mov x29, sp + stp x1, x2, [sp,#16] + ... + ldp x1, x2, [sp,#16] + ldp x29, x30, [sp],#32 + ret +*/ + +/* push r1 and r2, and alloc stack space for a total of + alloc_n elements (1 element=16 bytes, must be between 1 and 31. */ +static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr, + TCGReg r1, TCGReg r2, int alloc_n) +{ + /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx) + | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */ + assert(alloc_n > 0 && alloc_n < 0x20); + alloc_n = (-alloc_n) & 0x3f; + tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1); +} + +/* dealloc stack space for a total of alloc_n elements and pop r1, r2. */ +static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr, + TCGReg r1, TCGReg r2, int alloc_n) +{ + /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx) + | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */ + assert(alloc_n > 0 && alloc_n < 0x20); + tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1); +} + +static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr, + TCGReg r1, TCGReg r2, int idx) +{ + /* using register pair offset simm7 STP 0x29000000 | (ext) + | idx << 16 | r2 << 10 | addr << 5 | r1 */ + assert(idx > 0 && idx < 0x20); + tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1); +} + +static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr, + TCGReg r1, TCGReg r2, int idx) +{ + /* using register pair offset simm7 LDP 0x29400000 | (ext) + | idx << 16 | r2 << 10 | addr << 5 | r1 */ + assert(idx > 0 && idx < 0x20); + tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1); +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + /* ext will be set in the switch below, which will fall through to the + common code. It triggers the use of extended regs where appropriate. */ + int ext = 0; + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]); + tcg_out_goto(s, (tcg_target_long)tb_ret_addr); + break; + + case INDEX_op_goto_tb: +#ifndef USE_DIRECT_JUMP +#error "USE_DIRECT_JUMP required for aarch64" +#endif + assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ + s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + /* actual branch destination will be patched by + aarch64_tb_set_jmp_target later, beware retranslation. */ + tcg_out_goto_noaddr(s); + s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + break; + + case INDEX_op_call: + if (const_args[0]) { + tcg_out_call(s, args[0]); + } else { + tcg_out_callr(s, args[0]); + } + break; + + case INDEX_op_br: + tcg_out_goto_label(s, args[0]); + break; + + case INDEX_op_ld_i32: + case INDEX_op_ld_i64: + case INDEX_op_st_i32: + case INDEX_op_st_i64: + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc), + args[0], args[1], args[2]); + break; + + case INDEX_op_mov_i64: + ext = 1; /* fall through */ + case INDEX_op_mov_i32: + tcg_out_movr(s, ext, args[0], args[1]); + break; + + case INDEX_op_movi_i64: + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_movi_i32: + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + break; + + case INDEX_op_add_i64: + ext = 1; /* fall through */ + case INDEX_op_add_i32: + tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0); + break; + + case INDEX_op_sub_i64: + ext = 1; /* fall through */ + case INDEX_op_sub_i32: + tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0); + break; + + case INDEX_op_and_i64: + ext = 1; /* fall through */ + case INDEX_op_and_i32: + tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0); + break; + + case INDEX_op_or_i64: + ext = 1; /* fall through */ + case INDEX_op_or_i32: + tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0); + break; + + case INDEX_op_xor_i64: + ext = 1; /* fall through */ + case INDEX_op_xor_i32: + tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0); + break; + + case INDEX_op_mul_i64: + ext = 1; /* fall through */ + case INDEX_op_mul_i32: + tcg_out_mul(s, ext, args[0], args[1], args[2]); + break; + + case INDEX_op_shl_i64: + ext = 1; /* fall through */ + case INDEX_op_shl_i32: + if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */ + tcg_out_shl(s, ext, args[0], args[1], args[2]); + } else { /* LSL / LSLV */ + tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]); + } + break; + + case INDEX_op_shr_i64: + ext = 1; /* fall through */ + case INDEX_op_shr_i32: + if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */ + tcg_out_shr(s, ext, args[0], args[1], args[2]); + } else { /* LSR / LSRV */ + tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]); + } + break; + + case INDEX_op_sar_i64: + ext = 1; /* fall through */ + case INDEX_op_sar_i32: + if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */ + tcg_out_sar(s, ext, args[0], args[1], args[2]); + } else { /* ASR / ASRV */ + tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]); + } + break; + + case INDEX_op_rotr_i64: + ext = 1; /* fall through */ + case INDEX_op_rotr_i32: + if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */ + tcg_out_rotr(s, ext, args[0], args[1], args[2]); + } else { /* ROR / RORV */ + tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]); + } + break; + + case INDEX_op_rotl_i64: + ext = 1; /* fall through */ + case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */ + if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */ + tcg_out_rotl(s, ext, args[0], args[1], args[2]); + } else { + tcg_out_arith(s, ARITH_SUB, 0, + TCG_REG_TMP, TCG_REG_XZR, args[2], 0); + tcg_out_shiftrot_reg(s, SRR_ROR, ext, + args[0], args[1], TCG_REG_TMP); + } + break; + + case INDEX_op_brcond_i64: + ext = 1; /* fall through */ + case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */ + tcg_out_cmp(s, ext, args[0], args[1], 0); + tcg_out_goto_label_cond(s, args[2], args[3]); + break; + + case INDEX_op_setcond_i64: + ext = 1; /* fall through */ + case INDEX_op_setcond_i32: + tcg_out_cmp(s, ext, args[1], args[2], 0); + tcg_out_cset(s, 0, args[0], args[3]); + break; + + case INDEX_op_qemu_ld8u: + tcg_out_qemu_ld(s, args, 0 | 0); + break; + case INDEX_op_qemu_ld8s: + tcg_out_qemu_ld(s, args, 4 | 0); + break; + case INDEX_op_qemu_ld16u: + tcg_out_qemu_ld(s, args, 0 | 1); + break; + case INDEX_op_qemu_ld16s: + tcg_out_qemu_ld(s, args, 4 | 1); + break; + case INDEX_op_qemu_ld32u: + tcg_out_qemu_ld(s, args, 0 | 2); + break; + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, 4 | 2); + break; + case INDEX_op_qemu_ld32: + tcg_out_qemu_ld(s, args, 0 | 2); + break; + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, 0 | 3); + break; + case INDEX_op_qemu_st8: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st16: + tcg_out_qemu_st(s, args, 1); + break; + case INDEX_op_qemu_st32: + tcg_out_qemu_st(s, args, 2); + break; + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, 3); + break; + + case INDEX_op_bswap64_i64: + ext = 1; /* fall through */ + case INDEX_op_bswap32_i64: + case INDEX_op_bswap32_i32: + tcg_out_rev(s, ext, args[0], args[1]); + break; + case INDEX_op_bswap16_i64: + case INDEX_op_bswap16_i32: + tcg_out_rev16(s, 0, args[0], args[1]); + break; + + case INDEX_op_ext8s_i64: + ext = 1; /* fall through */ + case INDEX_op_ext8s_i32: + tcg_out_sxt(s, ext, 0, args[0], args[1]); + break; + case INDEX_op_ext16s_i64: + ext = 1; /* fall through */ + case INDEX_op_ext16s_i32: + tcg_out_sxt(s, ext, 1, args[0], args[1]); + break; + case INDEX_op_ext32s_i64: + tcg_out_sxt(s, 1, 2, args[0], args[1]); + break; + case INDEX_op_ext8u_i64: + case INDEX_op_ext8u_i32: + tcg_out_uxt(s, 0, args[0], args[1]); + break; + case INDEX_op_ext16u_i64: + case INDEX_op_ext16u_i32: + tcg_out_uxt(s, 1, args[0], args[1]); + break; + case INDEX_op_ext32u_i64: + tcg_out_movr(s, 0, args[0], args[1]); + break; + + default: + tcg_abort(); /* opcode not implemented */ + } +} + +static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_call, { "ri" } }, + { INDEX_op_br, { } }, + + { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_mov_i64, { "r", "r" } }, + + { INDEX_op_movi_i32, { "r" } }, + { INDEX_op_movi_i64, { "r" } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "r" } }, + { INDEX_op_add_i64, { "r", "r", "r" } }, + { INDEX_op_sub_i32, { "r", "r", "r" } }, + { INDEX_op_sub_i64, { "r", "r", "r" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mul_i64, { "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "r" } }, + { INDEX_op_and_i64, { "r", "r", "r" } }, + { INDEX_op_or_i32, { "r", "r", "r" } }, + { INDEX_op_or_i64, { "r", "r", "r" } }, + { INDEX_op_xor_i32, { "r", "r", "r" } }, + { INDEX_op_xor_i64, { "r", "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_brcond_i32, { "r", "r" } }, + { INDEX_op_setcond_i32, { "r", "r", "r" } }, + { INDEX_op_brcond_i64, { "r", "r" } }, + { INDEX_op_setcond_i64, { "r", "r", "r" } }, + + { INDEX_op_qemu_ld8u, { "r", "l" } }, + { INDEX_op_qemu_ld8s, { "r", "l" } }, + { INDEX_op_qemu_ld16u, { "r", "l" } }, + { INDEX_op_qemu_ld16s, { "r", "l" } }, + { INDEX_op_qemu_ld32u, { "r", "l" } }, + { INDEX_op_qemu_ld32s, { "r", "l" } }, + + { INDEX_op_qemu_ld32, { "r", "l" } }, + { INDEX_op_qemu_ld64, { "r", "l" } }, + + { INDEX_op_qemu_st8, { "l", "l" } }, + { INDEX_op_qemu_st16, { "l", "l" } }, + { INDEX_op_qemu_st32, { "l", "l" } }, + { INDEX_op_qemu_st64, { "l", "l" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ +#if !defined(CONFIG_USER_ONLY) + /* fail safe */ + if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) { + tcg_abort(); + } +#endif + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | + (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | + (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | + (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | + (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | + (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | + (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | + (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | + (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | + (1 << TCG_REG_X18)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ + + tcg_add_target_add_op_defs(aarch64_op_defs); +} + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + /* NB: frame sizes are in 16 byte stack units! */ + int frame_size_callee_saved, frame_size_tcg_locals; + TCGReg r; + + /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */ + frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1; + + /* frame size requirement for TCG local variables */ + frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long) + + (TCG_TARGET_STACK_ALIGN - 1); + frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1); + frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN; + + /* push (FP, LR) and update sp */ + tcg_out_push_pair(s, TCG_REG_SP, + TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved); + + /* FP -> callee_saved */ + tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP); + + /* store callee-preserved regs x19..x28 using FP -> callee_saved */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int idx = (r - TCG_REG_X19) / 2 + 1; + tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx); + } + + /* make stack space for TCG locals */ + tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP, + frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); + /* inform TCG about how to find TCG locals with register, offset, size */ + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#if defined(CONFIG_USE_GUEST_BASE) + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_gotor(s, tcg_target_call_iarg_regs[1]); + + tb_ret_addr = s->code_ptr; + + /* remove TCG locals stack space */ + tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP, + frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); + + /* restore registers x19..x28. + FP must be preserved, so it still points to callee_saved area */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int idx = (r - TCG_REG_X19) / 2 + 1; + tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx); + } + + /* pop (FP, LR), restore SP to previous frame, return */ + tcg_out_pop_pair(s, TCG_REG_SP, + TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved); + tcg_out_ret(s); +} diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h new file mode 100644 index 000000000..51e50920b --- /dev/null +++ b/tcg/aarch64/tcg-target.h @@ -0,0 +1,101 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#ifndef TCG_TARGET_AARCH64 +#define TCG_TARGET_AARCH64 1 + +#undef TCG_TARGET_WORDS_BIGENDIAN +#undef TCG_TARGET_STACK_GROWSUP + +typedef enum { + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4, + TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, TCG_REG_X8, TCG_REG_X9, + TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, + TCG_REG_X15, TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19, + TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, + TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, TCG_REG_X28, + TCG_REG_FP, /* frame pointer */ + TCG_REG_LR, /* link register */ + TCG_REG_SP, /* stack pointer or zero register */ + TCG_REG_XZR = TCG_REG_SP /* same register number */ + /* program counter is not directly accessible! */ +} TCGReg; + +#define TCG_TARGET_NB_REGS 32 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 +#define TCG_TARGET_CALL_STACK_OFFSET 0 + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8u_i32 1 +#define TCG_TARGET_HAS_ext16u_i32 1 +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_not_i32 0 +#define TCG_TARGET_HAS_neg_i32 0 +#define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_andc_i32 0 +#define TCG_TARGET_HAS_orc_i32 0 +#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 + +#define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 1 +#define TCG_TARGET_HAS_ext16u_i64 1 +#define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 +#define TCG_TARGET_HAS_not_i64 0 +#define TCG_TARGET_HAS_neg_i64 0 +#define TCG_TARGET_HAS_rot_i64 1 +#define TCG_TARGET_HAS_andc_i64 0 +#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 + +enum { + TCG_AREG0 = TCG_REG_X19, +}; + +static inline void flush_icache_range(tcg_target_ulong start, + tcg_target_ulong stop) +{ + __builtin___clear_cache((char *)start, (char *)stop); +} + +#endif /* TCG_TARGET_AARCH64 */ diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 47612fe26..6c4854dbb 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -22,50 +22,43 @@ * THE SOFTWARE. */ -#if defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7EM__) || \ - defined(__ARM_ARCH_7M__) || \ - defined(__ARM_ARCH_7R__) -#define USE_ARMV7_INSTRUCTIONS -#endif - -#if defined(USE_ARMV7_INSTRUCTIONS) || \ - defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) -#define USE_ARMV6_INSTRUCTIONS +/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# else +# define __ARM_ARCH 4 +# endif #endif -#if defined(USE_ARMV6_INSTRUCTIONS) || \ - defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) -#define USE_ARMV5_INSTRUCTIONS -#endif +static int arm_arch = __ARM_ARCH; -#ifdef USE_ARMV5_INSTRUCTIONS -static const int use_armv5_instructions = 1; +#if defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +# define use_armv5t_instructions 1 #else -static const int use_armv5_instructions = 0; +# define use_armv5t_instructions use_armv6_instructions #endif -#undef USE_ARMV5_INSTRUCTIONS -#ifdef USE_ARMV6_INSTRUCTIONS -static const int use_armv6_instructions = 1; -#else -static const int use_armv6_instructions = 0; -#endif -#undef USE_ARMV6_INSTRUCTIONS +#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) +#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) -#ifdef USE_ARMV7_INSTRUCTIONS -static const int use_armv7_instructions = 1; -#else -static const int use_armv7_instructions = 0; +#ifndef use_idiv_instructions +bool use_idiv_instructions; +#endif +#ifdef CONFIG_GETAUXVAL +# include <sys/auxv.h> #endif -#undef USE_ARMV7_INSTRUCTIONS #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { @@ -113,6 +106,8 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_R0, TCG_REG_R1 }; +#define TCG_REG_TMP TCG_REG_R12 + static inline void reloc_abs32(void *code_ptr, tcg_target_long target) { *(uint32_t *) code_ptr = target; @@ -145,6 +140,11 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } +#define TCG_CT_CONST_ARM 0x100 +#define TCG_CT_CONST_INV 0x200 +#define TCG_CT_CONST_NEG 0x400 +#define TCG_CT_CONST_ZERO 0x800 + /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -153,8 +153,17 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct_str = *pct_str; switch (ct_str[0]) { case 'I': - ct->ct |= TCG_CT_CONST_ARM; - break; + ct->ct |= TCG_CT_CONST_ARM; + break; + case 'K': + ct->ct |= TCG_CT_CONST_INV; + break; + case 'N': /* The gcc constraint letter is L, already used here. */ + ct->ct |= TCG_CT_CONST_NEG; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; case 'r': ct->ct |= TCG_CT_REG; @@ -166,18 +175,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); #ifdef CONFIG_SOFTMMU - /* r0 and r1 will be overwritten when reading the tlb entry, + /* r0-r2 will be overwritten when reading the tlb entry, so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if TARGET_LONG_BITS == 64 - /* If we're passing env to the helper as r0 and need a regpair - * for the address then r2 will be overwritten as we're setting - * up the args to the helper. - */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); #endif -#endif break; case 'L': ct->ct |= TCG_CT_REG; @@ -193,29 +196,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 's': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0 and r1 will be overwritten when reading the tlb entry - (softmmu only) and doing the byte swapping, so don't - use these. */ + /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) + and r0-r1 doing the byte swapping, so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64) +#if defined(CONFIG_SOFTMMU) /* Avoid clashes with registers being used for helper args */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#endif - break; - /* qemu_st64 data_reg2 */ - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0 and r1 will be overwritten when reading the tlb entry - (softmmu only) and doing the byte swapping, so don't - use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#ifdef CONFIG_SOFTMMU - /* r2 is still needed to load data_reg, so don't use it. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); #if TARGET_LONG_BITS == 64 /* Avoid clashes with registers being used for helper args */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); @@ -275,38 +262,65 @@ static inline int check_fit_imm(uint32_t imm) * add, sub, eor...: ditto */ static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) + const TCGArgConstraint *arg_ct) { int ct; ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { + return 1; + } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { + return 1; + } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { return 1; - else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; - else + } else { return 0; + } } -enum arm_data_opc_e { - ARITH_AND = 0x0, - ARITH_EOR = 0x1, - ARITH_SUB = 0x2, - ARITH_RSB = 0x3, - ARITH_ADD = 0x4, - ARITH_ADC = 0x5, - ARITH_SBC = 0x6, - ARITH_RSC = 0x7, - ARITH_TST = 0x8, - ARITH_CMP = 0xa, - ARITH_CMN = 0xb, - ARITH_ORR = 0xc, - ARITH_MOV = 0xd, - ARITH_BIC = 0xe, - ARITH_MVN = 0xf, -}; - -#define TO_CPSR(opc) \ - ((opc == ARITH_CMP || opc == ARITH_CMN || opc == ARITH_TST) << 20) +#define TO_CPSR (1 << 20) + +typedef enum { + ARITH_AND = 0x0 << 21, + ARITH_EOR = 0x1 << 21, + ARITH_SUB = 0x2 << 21, + ARITH_RSB = 0x3 << 21, + ARITH_ADD = 0x4 << 21, + ARITH_ADC = 0x5 << 21, + ARITH_SBC = 0x6 << 21, + ARITH_RSC = 0x7 << 21, + ARITH_TST = 0x8 << 21 | TO_CPSR, + ARITH_CMP = 0xa << 21 | TO_CPSR, + ARITH_CMN = 0xb << 21 | TO_CPSR, + ARITH_ORR = 0xc << 21, + ARITH_MOV = 0xd << 21, + ARITH_BIC = 0xe << 21, + ARITH_MVN = 0xf << 21, + + INSN_LDR_IMM = 0x04100000, + INSN_LDR_REG = 0x06100000, + INSN_STR_IMM = 0x04000000, + INSN_STR_REG = 0x06000000, + + INSN_LDRH_IMM = 0x005000b0, + INSN_LDRH_REG = 0x001000b0, + INSN_LDRSH_IMM = 0x005000f0, + INSN_LDRSH_REG = 0x001000f0, + INSN_STRH_IMM = 0x004000b0, + INSN_STRH_REG = 0x000000b0, + + INSN_LDRB_IMM = 0x04500000, + INSN_LDRB_REG = 0x06500000, + INSN_LDRSB_IMM = 0x005000d0, + INSN_LDRSB_REG = 0x001000d0, + INSN_STRB_IMM = 0x04400000, + INSN_STRB_REG = 0x06400000, + + INSN_LDRD_IMM = 0x004000d0, +} ARMInsn; #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) @@ -394,53 +408,64 @@ static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) static inline void tcg_out_dat_reg(TCGContext *s, int cond, int opc, int rd, int rn, int rm, int shift) { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc << 21) | TO_CPSR(opc) | + tcg_out32(s, (cond << 28) | (0 << 25) | opc | (rn << 16) | (rd << 12) | shift | rm); } -static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) +static inline void tcg_out_nop(TCGContext *s) { - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (rd != rm) { - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); + if (use_armv7_instructions) { + /* Architected nop introduced in v6k. */ + /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this + also Just So Happened to do nothing on pre-v6k so that we + don't need to conditionalize it? */ + tcg_out32(s, 0xe320f000); + } else { + /* Prior to that the assembler uses mov r0, r0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_dat_reg2(TCGContext *s, - int cond, int opc0, int opc1, int rd0, int rd1, - int rn0, int rn1, int rm0, int rm1, int shift) +static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) { - if (rd0 == rn1 || rd0 == rm1) { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) | - (rn0 << 16) | (8 << 12) | shift | rm0); - tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) | - (rn1 << 16) | (rd1 << 12) | shift | rm1); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd0, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); - } else { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) | - (rn0 << 16) | (rd0 << 12) | shift | rm0); - tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) | - (rn1 << 16) | (rd1 << 12) | shift | rm1); + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (rd != rm) { + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); } } static inline void tcg_out_dat_imm(TCGContext *s, int cond, int opc, int rd, int rn, int im) { - tcg_out32(s, (cond << 28) | (1 << 25) | (opc << 21) | TO_CPSR(opc) | + tcg_out32(s, (cond << 28) | (1 << 25) | opc | (rn << 16) | (rd << 12) | im); } -static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, uint32_t arg) +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) { - /* TODO: This is very suboptimal, we can easily have a constant - * pool somewhere after all the instructions. */ - if ((int)arg < 0 && (int)arg >= -0x100) { - tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - } else if (use_armv7_instructions) { - /* use movw/movt */ + int rot, opc, rn; + + /* For armv7, make sure not to use movw+movt when mov/mvn would do. + Speed things up by only checking when movt would be required. + Prior to armv7, have one go at fully rotated immediates before + doing the decomposition thing below. */ + if (!use_armv7_instructions || (arg & 0xffff0000)) { + rot = encode_imm(arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, + rotl(arg, rot) | (rot << 7)); + return; + } + rot = encode_imm(~arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, + rotl(~arg, rot) | (rot << 7)); + return; + } + } + + /* Use movw + movt. */ + if (use_armv7_instructions) { /* movw */ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); @@ -449,22 +474,27 @@ static inline void tcg_out_movi32(TCGContext *s, tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); } - } else { - int opc = ARITH_MOV; - int rn = 0; - - do { - int i, rot; - - i = ctz32(arg) & ~1; - rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); - arg &= ~(0xff << i); + return; + } - opc = ARITH_ORR; - rn = rd; - } while (arg); + /* TODO: This is very suboptimal, we can easily have a constant + pool somewhere after all the instructions. */ + opc = ARITH_MOV; + rn = 0; + /* If we have lots of leading 1's, we can shorten the sequence by + beginning with mvn and then clearing higher bits with eor. */ + if (clz32(~arg) > clz32(arg)) { + opc = ARITH_MVN, arg = ~arg; } + do { + int i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_EOR; + rn = rd; + } while (arg); } static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, @@ -482,55 +512,112 @@ static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, } } -static inline void tcg_out_mul32(TCGContext *s, - int cond, int rd, int rs, int rm) +static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, + TCGReg dst, TCGReg lhs, TCGArg rhs, + bool rhs_is_const) { - if (rd != rm) - tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) | - (rs << 8) | 0x90 | rm); - else if (rd != rs) - tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) | - (rm << 8) | 0x90 | rs); - else { - tcg_out32(s, (cond << 28) | ( 8 << 16) | (0 << 12) | - (rs << 8) | 0x90 | rm); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIK" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = ~rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_umull32(TCGContext *s, - int cond, int rd0, int rd1, int rs, int rm) +static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, + TCGArg dst, TCGArg lhs, TCGArg rhs, + bool rhs_is_const) { - if (rd0 != rm && rd1 != rm) - tcg_out32(s, (cond << 28) | 0x800090 | - (rd1 << 16) | (rd0 << 12) | (rs << 8) | rm); - else if (rd0 != rs && rd1 != rs) - tcg_out32(s, (cond << 28) | 0x800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rs); - else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0)); - tcg_out32(s, (cond << 28) | 0x800098 | - (rd1 << 16) | (rd0 << 12) | (rs << 8)); + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIN" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = -rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_smull32(TCGContext *s, - int cond, int rd0, int rd1, int rs, int rm) +static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, + TCGReg rn, TCGReg rm) { - if (rd0 != rm && rd1 != rm) - tcg_out32(s, (cond << 28) | 0xc00090 | - (rd1 << 16) | (rd0 << 12) | (rs << 8) | rm); - else if (rd0 != rs && rd1 != rs) - tcg_out32(s, (cond << 28) | 0xc00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rs); - else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0)); - tcg_out32(s, (cond << 28) | 0xc00098 | - (rd1 << 16) | (rd0 << 12) | (rs << 8)); + /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ + if (!use_armv6_instructions && rd == rn) { + if (rd == rm) { + /* rd == rn == rm; copy an input to tmp first. */ + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rm = rn = TCG_REG_TMP; + } else { + rn = rm; + rm = rd; + } + } + /* mul */ + tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); +} + +static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* umull */ + tcg_out32(s, (cond << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } } + /* smull */ + tcg_out32(s, (cond << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); } static inline void tcg_out_ext8s(TCGContext *s, int cond, @@ -588,11 +675,11 @@ static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_ASR(16)); + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); } } @@ -603,11 +690,11 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16)); + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); } } @@ -620,10 +707,10 @@ static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_R8, TCG_REG_R8, 0xff); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); + tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSL(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); } } @@ -634,205 +721,186 @@ static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_R8, rn, rn, SHIFT_IMM_ROR(16)); + TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_R8, TCG_REG_R8, 0xff | 0x800); + TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rn, SHIFT_IMM_ROR(8)); tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_R8, SHIFT_IMM_LSR(8)); + rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); } } -static inline void tcg_out_ld32_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +bool tcg_target_deposit_valid(int ofs, int len) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05900000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05100000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + /* ??? Without bfi, we could improve over generic code by combining + the right-shift from a non-zero ofs with the orr. We do run into + problems when rd == rs, and the mask generated from ofs+len doesn't + fit into an immediate. We would have to be careful not to pessimize + wrt the optimizations performed on the expanded code. */ + return use_armv7_instructions; } -/* Offset pre-increment with base writeback. */ -static inline void tcg_out_ld32_12wb(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, + TCGArg a1, int ofs, int len, bool const_a1) { - /* ldr with writeback and both register equals is UNPREDICTABLE */ - assert(rd != rn); + if (const_a1) { + /* bfi becomes bfc with rn == 15. */ + a1 = 15; + } + /* bfi/bfc */ + tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 + | (ofs << 7) | ((ofs + len - 1) << 16)); +} - if (im >= 0) { - tcg_out32(s, (cond << 28) | 0x05b00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - } else { - tcg_out32(s, (cond << 28) | 0x05300000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); +/* Note that this routine is used for both LDR and LDRH formats, so we do + not wish to include an immediate shift at this point. */ +static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, TCGReg rm, bool u, bool p, bool w) +{ + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) + | (w << 21) | (rn << 16) | (rt << 12) | rm); +} + +static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm8, bool p, bool w) +{ + bool u = 1; + if (imm8 < 0) { + imm8 = -imm8; + u = 0; } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); } -static inline void tcg_out_st32_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm12, bool p, bool w) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05800000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05000000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + bool u = 1; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | imm12); } -static inline void tcg_out_ld32_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - tcg_out32(s, (cond << 28) | 0x07900000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_st32_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - tcg_out32(s, (cond << 28) | 0x07800000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); } /* Register pre-increment with base writeback. */ -static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07b00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); } -static inline void tcg_out_st32_rwb(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07a00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); } -static inline void tcg_out_ld16u_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000b0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000b0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_st16_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01c000b0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x014000b0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld16u_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000b0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_st16_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x018000b0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld16s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000f0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000f0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld16s_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000f0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld8_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05d00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05500000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_st8_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05c00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05400000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_ld8_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07d00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_st8_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07c00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld8s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000d0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000d0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld8s_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000d0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); } static inline void tcg_out_ld32u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld32_12(s, cond, rd, rn, offset); } @@ -841,8 +909,8 @@ static inline void tcg_out_st32(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st32_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st32_12(s, cond, rd, rn, offset); } @@ -851,8 +919,8 @@ static inline void tcg_out_ld16u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld16u_8(s, cond, rd, rn, offset); } @@ -861,8 +929,8 @@ static inline void tcg_out_ld16s(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld16s_8(s, cond, rd, rn, offset); } @@ -871,8 +939,8 @@ static inline void tcg_out_st16(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st16_8(s, cond, rd, rn, offset); } @@ -881,8 +949,8 @@ static inline void tcg_out_ld8u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld8_12(s, cond, rd, rn, offset); } @@ -891,8 +959,8 @@ static inline void tcg_out_ld8s(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld8s_8(s, cond, rd, rn, offset); } @@ -901,8 +969,8 @@ static inline void tcg_out_st8(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st8_12(s, cond, rd, rn, offset); } @@ -930,10 +998,10 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); tcg_out32(s, addr); } else { - tcg_out_movi32(s, cond, TCG_REG_R8, val - 8); + tcg_out_movi32(s, cond, TCG_REG_TMP, val - 8); tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_PC, TCG_REG_PC, - TCG_REG_R8, SHIFT_IMM_LSL(0)); + TCG_REG_TMP, SHIFT_IMM_LSL(0)); } } } @@ -948,13 +1016,16 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr) if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) { if (addr & 1) { /* Use BLX if the target is in Thumb mode */ - if (!use_armv5_instructions) { + if (!use_armv5t_instructions) { tcg_abort(); } tcg_out_blx_imm(s, val); } else { tcg_out_bl(s, COND_AL, val); } + } else if (use_armv7_instructions) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr); + tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); @@ -964,7 +1035,7 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr) static inline void tcg_out_callr(TCGContext *s, int cond, int arg) { - if (use_armv5_instructions) { + if (use_armv5t_instructions) { tcg_out_blx(s, cond, arg); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, @@ -977,14 +1048,9 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) { TCGLabel *l = &s->labels[label_index]; - if (l->has_value) + if (l->has_value) { tcg_out_goto(s, cond, l->u.value); - else if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337); - s->code_ptr += 4; } else { - /* Probably this should be preferred even for COND_AL... */ tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337); tcg_out_b_noaddr(s, cond); } @@ -992,7 +1058,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) #ifdef CONFIG_SOFTMMU -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1017,64 +1083,35 @@ static const void * const qemu_st_helpers[4] = { * argreg is where we want to put this argument, arg is the argument itself. * Return value is the updated argreg ready for the next call. * Note that argreg 0..3 is real registers, 4+ on stack. - * When we reach the first stacked argument, we allocate space for it - * and the following stacked arguments using "str r8, [sp, #-0x10]!". - * Following arguments are filled in with "str r8, [sp, #0xNN]". - * For more than 4 stacked arguments we'd need to know how much - * space to allocate when we pushed the first stacked argument. - * We don't need this, so don't implement it (and will assert if you try it.) * * We provide routines for arguments which are: immediate, 32 bit * value in register, 16 and 8 bit values in register (which must be zero * extended before use) and 64 bit value in a lo:hi register pair. */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM) \ - static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM) \ - { \ - if (argreg < 4) { \ - TCG_OUT_ARG_GET_ARG(argreg); \ - } else if (argreg == 4) { \ - TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \ - tcg_out32(s, (COND_AL << 28) | 0x052d8010); \ - } else { \ - assert(argreg < 8); \ - TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \ - tcg_out32(s, (COND_AL << 28) | 0x058d8000 | (argreg - 4) * 4); \ - } \ - return argreg + 1; \ - } - -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg) -#undef TCG_OUT_ARG_GET_ARG -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg) -#undef TCG_OUT_ARG_GET_ARG -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg) -#undef TCG_OUT_ARG_GET_ARG - -/* We don't use the macro for this one to avoid an unnecessary reg-reg - * move when storing to the stack. - */ -static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg) -{ - if (argreg < 4) { - tcg_out_mov_reg(s, COND_AL, argreg, arg); - } else if (argreg == 4) { - /* str arg, [sp, #-0x10]! */ - tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12)); - } else { - assert(argreg < 8); - /* str arg, [sp, #0xNN] */ - tcg_out32(s, (COND_AL << 28) | 0x058d0000 | - (arg << 12) | (argreg - 4) * 4); - } - return argreg + 1; -} - -static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ +{ \ + if (argreg < 4) { \ + MOV_ARG(s, COND_AL, argreg, arg); \ + } else { \ + int ofs = (argreg - 4) * 4; \ + EXT_ARG; \ + assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ + tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ + } \ + return argreg + 1; \ +} + +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, + (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, + (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, + (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) + +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, + TCGReg arglo, TCGReg arghi) { /* 64 bit arguments must go in even/odd register pairs * and in 8-aligned stack slots. @@ -1087,180 +1124,302 @@ static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, return argreg; } -static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg) +#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + +/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing + to the tlb entry. Clobbers R1 and TMP. */ + +static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + int s_bits, int tlb_offset) +{ + TCGReg base = TCG_AREG0; + + /* Should generate something like the following: + * pre-v7: + * shr tmp, addr_reg, #TARGET_PAGE_BITS (1) + * add r2, env, #off & 0xff00 + * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) + * ldr r0, [r2, #off & 0xff]! (4) + * tst addr_reg, #s_mask + * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5) + * + * v7 (not implemented yet): + * ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1) + * movw tmp, #~TARGET_PAGE_MASK & ~s_mask + * movw r0, #off + * add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2) + * bic tmp, addr_reg, tmp + * ldr r0, [r2, r0]! (3) + * cmp r0, tmp (4) + */ +# if CPU_TLB_BITS > 8 +# error +# endif + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, + 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + + /* We assume that the offset is contained within 16 bits. */ + assert((tlb_offset & ~0xffff) == 0); + if (tlb_offset > 0xff) { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + (24 << 7) | (tlb_offset >> 8)); + tlb_offset &= 0xff; + base = TCG_REG_R2; + } + + tcg_out_dat_imm(s, COND_AL, ARITH_AND, + TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + + /* Load the tlb comparator. Use ldrd if needed and available, + but due to how the pointer needs setting up, ldm isn't useful. + Base arm5 doesn't have ldrd, but armv5te does. */ + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0, + TCG_REG_R2, tlb_offset, 1, 1); + } else { + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, + TCG_REG_R2, tlb_offset, 1, 1); + if (TARGET_LONG_BITS == 64) { + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1, + TCG_REG_R2, 4, 1, 0); + } + } + + /* Check alignment. */ + if (s_bits) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, + 0, addrlo, (1 << s_bits) - 1); + } + + tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, + TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); + } +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, + int data_reg, int data_reg2, int addrlo_reg, + int addrhi_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) { - /* Output any necessary post-call cleanup of the stack */ - if (argreg > 4) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); + int idx; + TCGLabelQemuLdst *label; + + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); } + + idx = s->nb_qemu_ldst_labels++; + label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->datahi_reg = data_reg2; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; } -#endif +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, data_reg, data_reg2; + uint8_t *start; -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[lb->opc & 3]); + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + + start = s->code_ptr; + switch (lb->opc) { + case 0 | 4: + tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 1 | 4: + tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 0: + case 1: + case 2: + default: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 3: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); + break; + } + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. Note that the + moves above could be elided by register allocation, nor do we know + which code alternative we chose for extension. */ + switch (s->code_ptr - start) { + case 0: + tcg_out_nop(s); + /* FALLTHRU */ + case 4: + tcg_out_nop(s); + /* FALLTHRU */ + case 8: + break; + default: + abort(); + } + + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - int addr_reg, data_reg, data_reg2, bswap; + TCGReg argreg, data_reg, data_reg2; + + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = TCG_REG_R0; + argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + switch (lb->opc) { + case 0: + argreg = tcg_out_arg_reg8(s, argreg, data_reg); + break; + case 1: + argreg = tcg_out_arg_reg16(s, argreg, data_reg); + break; + case 2: + argreg = tcg_out_arg_reg32(s, argreg, data_reg); + break; + case 3: + argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); + break; + } + + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_st_helpers[lb->opc & 3]); + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. */ + tcg_out_nop(s); + tcg_out_nop(s); + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +{ + TCGReg addr_reg, data_reg, data_reg2; + bool bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, tlb_offset; - TCGReg argreg; -# if TARGET_LONG_BITS == 64 - int addr_reg2; -# endif - uint32_t *label_ptr; + int mem_index, s_bits; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif - #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else bswap = 0; #endif + data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ + data_reg2 = (opc == 3 ? *args++ : 0); addr_reg = *args++; #ifdef CONFIG_SOFTMMU -# if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -# endif + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; s_bits = opc & 3; - /* Should generate something like the following: - * shr r8, addr_reg, #TARGET_PAGE_BITS - * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8 - * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS - */ -# if CPU_TLB_BITS > 8 -# error -# endif - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8, - 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, - TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* We assume that the offset is contained within 20 bits. */ - tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - assert(tlb_offset & ~0xfffff == 0); - if (tlb_offset > 0xfff) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - 0xa00 | (tlb_offset >> 12)); - tlb_offset &= 0xfff; - } - tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, - TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - /* Check alignment. */ - if (s_bits) - tcg_out_dat_imm(s, COND_EQ, ARITH_TST, - 0, addr_reg, (1 << s_bits) - 1); -# if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load in the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); -# endif - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, + tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); + + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read)); switch (opc) { case 0: - tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 0 | 4: - tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap16(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap16(s, COND_AL, data_reg, data_reg); } break; case 1 | 4: if (bswap) { - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); - tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: - tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } break; case 3: if (bswap) { - tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); - tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2); - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); -#if TARGET_LONG_BITS == 64 - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); -#else - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); -#endif - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); - tcg_out_arg_stacktidy(s, argreg); - - switch (opc) { - case 0 | 4: - tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 1 | 4: - tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 0: - case 1: - case 2: - default: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 3: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); - break; - } - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; - int i; - int rot; + int i, rot; while (offset) { i = ctz32(offset) & ~1; rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addr_reg, ((offset >> i) & 0xff) | rot); - addr_reg = TCG_REG_R8; + addr_reg = TCG_REG_TMP; offset &= ~(0xff << i); } } @@ -1311,142 +1470,76 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, bswap; + TCGReg addr_reg, data_reg, data_reg2; + bool bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, tlb_offset; - TCGReg argreg; -# if TARGET_LONG_BITS == 64 - int addr_reg2; -# endif - uint32_t *label_ptr; + int mem_index, s_bits; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif - #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else bswap = 0; #endif + data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ + data_reg2 = (opc == 3 ? *args++ : 0); addr_reg = *args++; #ifdef CONFIG_SOFTMMU -# if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -# endif + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; s_bits = opc & 3; - /* Should generate something like the following: - * shr r8, addr_reg, #TARGET_PAGE_BITS - * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8 - * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS - */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - TCG_REG_R8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, - TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* We assume that the offset is contained within 20 bits. */ - tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - assert(tlb_offset & ~0xfffff == 0); - if (tlb_offset > 0xfff) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - 0xa00 | (tlb_offset >> 12)); - tlb_offset &= 0xfff; - } - tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, - TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - /* Check alignment. */ - if (s_bits) - tcg_out_dat_imm(s, COND_EQ, ARITH_TST, - 0, addr_reg, (1 << s_bits) - 1); -# if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load in the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); -# endif - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, + tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, + offsetof(CPUArchState, + tlb_table[mem_index][0].addr_write)); + + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write)); switch (opc) { case 0: - tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: if (bswap) { - tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st16_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st16_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 3: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); - tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); + tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4); } else { - tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); -#if TARGET_LONG_BITS == 64 - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); -#else - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); -#endif - - switch (opc) { - case 0: - argreg = tcg_out_arg_reg8(s, argreg, data_reg); - break; - case 1: - argreg = tcg_out_arg_reg16(s, argreg, data_reg); - break; - case 2: - argreg = tcg_out_arg_reg32(s, argreg, data_reg); - break; - case 3: - argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); - tcg_out_arg_stacktidy(s, argreg); - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1506,21 +1599,20 @@ static uint8_t *tb_ret_addr; static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { + TCGArg a0, a1, a2, a3, a4, a5; int c; switch (opc) { case INDEX_op_exit_tb: - { + if (use_armv7_instructions || check_fit_imm(args[0])) { + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); + } else { uint8_t *ld_ptr = s->code_ptr; - if (args[0] >> 8) - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); - else - tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); - if (args[0] >> 8) { - *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; - tcg_out32(s, args[0]); - } + *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; + tcg_out32(s, args[0]); } break; case INDEX_op_goto_tb: @@ -1598,23 +1690,36 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Constraints mean that v2 is always in the same register as dest, * so we only need to do "if condition passed, move v1 to dest". */ - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]], - ARITH_MOV, args[0], 0, args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + ARITH_MVN, args[0], 0, args[3], const_args[3]); break; case INDEX_op_add_i32: - c = ARITH_ADD; - goto gen_arith; + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_sub_i32: - c = ARITH_SUB; - goto gen_arith; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); + } else { + tcg_out_dat_rI(s, COND_AL, ARITH_RSB, + args[0], args[2], args[1], 1); + } + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, + args[0], args[1], args[2], const_args[2]); + } + break; case INDEX_op_and_i32: - c = ARITH_AND; - goto gen_arith; + tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_andc_i32: - c = ARITH_BIC; - goto gen_arith; + tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_or_i32: c = ARITH_ORR; goto gen_arith; @@ -1625,14 +1730,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_add2_i32: - tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC, - args[0], args[1], args[2], args[3], - args[4], args[5], SHIFT_IMM_LSL(0)); + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if (a0 == a3 || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a2, a4, const_args[4]); + tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, + a1, a3, a5, const_args[5]); + tcg_out_mov_reg(s, COND_AL, args[0], a0); break; case INDEX_op_sub2_i32: - tcg_out_dat_reg2(s, COND_AL, ARITH_SUB, ARITH_SBC, - args[0], args[1], args[2], args[3], - args[4], args[5], SHIFT_IMM_LSL(0)); + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + if (const_args[2]) { + if (const_args[4]) { + tcg_out_movi32(s, COND_AL, a0, a4); + a4 = a0; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, + ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); + } + if (const_args[3]) { + if (const_args[5]) { + tcg_out_movi32(s, COND_AL, a1, a5); + a5 = a1; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); + } else { + tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, + a1, a3, a5, const_args[5]); + } + tcg_out_mov_reg(s, COND_AL, args[0], a0); break; case INDEX_op_neg_i32: tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); @@ -1647,6 +1782,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mulu2_i32: tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; + case INDEX_op_muls2_i32: + tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; /* XXX: Perhaps args[2] & 0x1f is wrong */ case INDEX_op_shl_i32: c = const_args[2] ? @@ -1675,14 +1813,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : SHIFT_IMM_LSL(0)); } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_R8, args[1], 0x20); + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20); tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_R8)); + SHIFT_REG_ROR(TCG_REG_TMP)); } break; case INDEX_op_brcond_i32: - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, args[0], args[1], const_args[1]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]); break; @@ -1695,15 +1833,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, */ - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[1], args[3], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - args[0], args[2], SHIFT_IMM_LSL(0)); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[0], args[2], const_args[2]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]); break; case INDEX_op_setcond_i32: - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, - args[1], args[2], const_args[2]); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], @@ -1711,10 +1849,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_setcond2_i32: /* See brcond2_i32 comment */ - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[2], args[4], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - args[1], args[3], SHIFT_IMM_LSL(0)); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[2], args[4], const_args[4]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], @@ -1770,11 +1908,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16u(s, COND_AL, args[0], args[1]); break; + case INDEX_op_deposit_i32: + tcg_out_deposit(s, COND_AL, args[0], args[2], + args[3], args[4], const_args[2]); + break; + + case INDEX_op_div_i32: + tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_divu_i32: + tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); + break; + default: tcg_abort(); } } +#ifdef CONFIG_SOFTMMU +/* Generate TB finalization at the end of block. */ +void tcg_out_tb_finalize(TCGContext *s) +{ + int i; + for (i = 0; i < s->nb_qemu_ldst_labels; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} +#endif /* SOFTMMU */ + static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, @@ -1794,12 +1960,13 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_st_i32, { "r", "r" } }, /* TODO: "r", "r", "ri" */ - { INDEX_op_add_i32, { "r", "r", "rI" } }, - { INDEX_op_sub_i32, { "r", "r", "rI" } }, + { INDEX_op_add_i32, { "r", "r", "rIN" } }, + { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rI" } }, - { INDEX_op_andc_i32, { "r", "r", "rI" } }, + { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "rIK" } }, + { INDEX_op_andc_i32, { "r", "r", "rIK" } }, { INDEX_op_or_i32, { "r", "r", "rI" } }, { INDEX_op_xor_i32, { "r", "r", "rI" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1811,15 +1978,14 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_rotl_i32, { "r", "r", "ri" } }, { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_brcond_i32, { "r", "rI" } }, - { INDEX_op_setcond_i32, { "r", "r", "rI" } }, - { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } }, + { INDEX_op_brcond_i32, { "r", "rIN" } }, + { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, + { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, - /* TODO: "r", "r", "r", "r", "ri", "ri" */ - { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, + { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "r", "l" } }, @@ -1832,7 +1998,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s" } }, #else { INDEX_op_qemu_ld8u, { "r", "l", "l" } }, { INDEX_op_qemu_ld8s, { "r", "l", "l" } }, @@ -1844,7 +2010,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s", "s" } }, #endif { INDEX_op_bswap16_i32, { "r", "r" } }, @@ -1854,16 +2020,32 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_ext16s_i32, { "r", "r" } }, { INDEX_op_ext16u_i32, { "r", "r" } }, + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { -1 }, }; static void tcg_target_init(TCGContext *s) { -#if !defined(CONFIG_USER_ONLY) - /* fail safe */ - if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) - tcg_abort(); -#endif +#if defined(CONFIG_GETAUXVAL) + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +# if !defined(use_idiv_instructions) + { + unsigned long hwcap = getauxval(AT_HWCAP); + use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; + } +# endif + if (__ARM_ARCH < 7) { + const char *pl = (const char *)getauxval(AT_PLATFORM); + if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { + arm_arch = pl[1] - '0'; + } + } +#endif /* GETAUXVAL */ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, @@ -1876,12 +2058,10 @@ static void tcg_target_init(TCGContext *s) tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R8); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); tcg_add_target_add_op_defs(arm_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, @@ -1908,20 +2088,93 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi32(s, COND_AL, ret, arg); } +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + static void tcg_target_qemu_prologue(TCGContext *s) { - /* Calling convention requires us to save r4-r11 and lr; - * save also r12 to maintain stack 8-alignment. - */ + int stack_addend; - /* stmdb sp!, { r4 - r12, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d5ff0); + /* Calling convention requires us to save r4-r11 and lr. */ + /* stmdb sp!, { r4 - r11, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); + + /* Reserve callee argument and tcg temp space. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; + + tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); tb_ret_addr = s->code_ptr; - /* ldmia sp!, { r4 - r12, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd9ff0); + /* Epilogue. We branch here via tb_ret_addr. */ + tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + + /* ldmia sp!, { r4 - r11, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); +} + +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_ARM + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = 0x7c, /* sleb128 -4 */ + .cie.return_column = 14, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, 13, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* The following must match the stmdb in the prologue. */ + 0x8e, 1, /* DW_CFA_offset, lr, -4 */ + 0x8b, 2, /* DW_CFA_offset, r11, -8 */ + 0x8a, 3, /* DW_CFA_offset, r10, -12 */ + 0x89, 4, /* DW_CFA_offset, r9, -16 */ + 0x88, 5, /* DW_CFA_offset, r8, -20 */ + 0x87, 6, /* DW_CFA_offset, r7, -24 */ + 0x86, 7, /* DW_CFA_offset, r6, -28 */ + 0x85, 8, /* DW_CFA_offset, r5, -32 */ + 0x84, 9, /* DW_CFA_offset, r4, -36 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + debug_frame.fde.func_start = (tcg_target_long) buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 98fa11b28..5cd9d6a67 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -22,6 +22,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_ARM #define TCG_TARGET_ARM 1 #undef TCG_TARGET_WORDS_BIGENDIAN @@ -48,7 +49,12 @@ typedef enum { #define TCG_TARGET_NB_REGS 16 -#define TCG_CT_CONST_ARM 0x100 +#ifdef __ARM_ARCH_EXT_IDIV__ +#define use_idiv_instructions 1 +#else +extern bool use_idiv_instructions; +#endif + /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R13 @@ -57,7 +63,6 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 0 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */ @@ -72,8 +77,14 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_div_i32 use_idiv_instructions +#define TCG_TARGET_HAS_rem_i32 0 + +extern bool tcg_target_deposit_valid(int ofs, int len); +#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid enum { TCG_AREG0 = TCG_REG_R6, @@ -91,3 +102,5 @@ static inline void flush_icache_range(tcg_target_ulong start, __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } + +#endif diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c index de500ae18..68f77ba4d 100644 --- a/tcg/hppa/tcg-target.c +++ b/tcg/hppa/tcg-target.c @@ -824,7 +824,7 @@ static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah, tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index); break; case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index); + tcg_out_brcond(s, TCG_COND_NE, al, bl, blconst, label_index); tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index); break; default: @@ -906,7 +906,7 @@ static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret, } #if defined(CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1766,28 +1766,11 @@ static void tcg_target_init(TCGContext *s) } typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t id; - uint8_t version; - char augmentation[1]; - uint8_t code_align; - uint8_t data_align; - uint8_t return_column; -} DebugFrameCIE; - -typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t cie_offset; - tcg_target_long func_start __attribute__((packed)); - tcg_target_long func_len __attribute__((packed)); - uint8_t def_cfa[4]; - uint8_t ret_ofs[3]; - uint8_t reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; -} DebugFrameFDE; - -typedef struct { DebugFrameCIE cie; - DebugFrameFDE fde; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_ret_ofs[3]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; } DebugFrame; #define ELF_HOST_MACHINE EM_PARISC @@ -1806,16 +1789,18 @@ static DebugFrame debug_frame = { .cie.data_align = 1, .cie.return_column = 2, - .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */ - .fde.def_cfa = { + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { 0x12, 30, /* DW_CFA_def_cfa_sf sp, ... */ (-FRAME_SIZE & 0x7f) | 0x80, /* ... sleb128 -FRAME_SIZE */ (-FRAME_SIZE >> 7) & 0x7f }, - .fde.ret_ofs = { + .fde_ret_ofs = { 0x11, 2, (-20 / 4) & 0x7f /* DW_CFA_offset_extended_sf r2, 20 */ }, - .fde.reg_ofs = { + .fde_reg_ofs = { /* This must match the ordering in tcg_target_callee_save_regs. */ 0x80 + 4, 0, /* DW_CFA_offset r4, 0 */ 0x80 + 5, 4, /* DW_CFA_offset r5, 4 */ diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index f43fb419a..25467bdd4 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -22,6 +22,7 @@ * THE SOFTWARE. */ +#ifndef TCG_TARGET_HPPA #define TCG_TARGET_HPPA 1 #if TCG_TARGET_REG_BITS != 32 @@ -84,6 +85,7 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_div_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 @@ -97,6 +99,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ @@ -119,3 +122,5 @@ static inline void flush_icache_range(tcg_target_ulong start, start += 32; } } + +#endif diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 6f3ad3cea..87eeab3d3 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -97,6 +97,18 @@ static const int tcg_target_call_oarg_regs[] = { # define TCG_REG_L1 TCG_REG_EDX #endif +/* For 32-bit, we are going to attempt to determine at runtime whether cmov + is available. However, the host compiler must supply <cpuid.h>, as we're + not going to go so far as our own inline assembly. */ +#if TCG_TARGET_REG_BITS == 64 +# define have_cmov 1 +#elif defined(CONFIG_CPUID_H) +#include <cpuid.h> +static bool have_cmov; +#else +# define have_cmov 0 +#endif + static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, @@ -943,7 +955,14 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + if (have_cmov) { + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + } else { + int over = gen_new_label(); + tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); + tcg_out_mov(s, TCG_TYPE_I32, dest, v1); + tcg_out_label(s, over, s->code_ptr); + } } #if TCG_TARGET_REG_BITS == 64 @@ -982,7 +1001,7 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) #if defined(CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1903,40 +1922,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_st(s, args, 3); break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); + OP_32_64(mulu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; - case INDEX_op_mulu2_i32: - tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]); + OP_32_64(muls2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); break; - case INDEX_op_add2_i32: + OP_32_64(add2): if (const_args[4]) { - tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); + tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); } else { - tgen_arithr(s, ARITH_ADD, args[0], args[4]); + tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); + tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); } else { - tgen_arithr(s, ARITH_ADC, args[1], args[5]); + tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); } break; - case INDEX_op_sub2_i32: + OP_32_64(sub2): if (const_args[4]) { - tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); + tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); } else { - tgen_arithr(s, ARITH_SUB, args[0], args[4]); + tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); + tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); } else { - tgen_arithr(s, ARITH_SBB, args[1], args[5]); + tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); } break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; #else /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_movi_i64: tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); @@ -2059,10 +2082,12 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, #endif -#if TCG_TARGET_REG_BITS == 32 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, #else @@ -2080,7 +2105,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_st32_i64, { "ri", "r" } }, { INDEX_op_st_i64, { "re", "r" } }, - { INDEX_op_add_i64, { "r", "0", "re" } }, + { INDEX_op_add_i64, { "r", "r", "re" } }, { INDEX_op_mul_i64, { "r", "0", "re" } }, { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, @@ -2113,6 +2138,11 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, + + { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, #endif #if TCG_TARGET_REG_BITS == 64 @@ -2243,10 +2273,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_target_init(TCGContext *s) { -#if !defined(CONFIG_USER_ONLY) - /* fail safe */ - if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) - tcg_abort(); + /* For 32-bit, 99% certainty that we're running on hardware that supports + cmov, but we still need to check. In case cmov is not available, we'll + use a small forward branch. */ +#ifndef have_cmov + { + unsigned a, b, c, d; + have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); + } #endif if (TCG_TARGET_REG_BITS == 64) { @@ -2278,29 +2312,15 @@ static void tcg_target_init(TCGContext *s) } typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t id; - uint8_t version; - char augmentation[1]; - uint8_t code_align; - uint8_t data_align; - uint8_t return_column; -} DebugFrameCIE; - -typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t cie_offset; - tcg_target_long func_start __attribute__((packed)); - tcg_target_long func_len __attribute__((packed)); - uint8_t def_cfa[4]; - uint8_t reg_ofs[14]; -} DebugFrameFDE; - -typedef struct { DebugFrameCIE cie; - DebugFrameFDE fde; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[14]; } DebugFrame; +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + #if !defined(__ELF__) /* Host machine without ELF. */ #elif TCG_TARGET_REG_BITS == 64 @@ -2313,13 +2333,15 @@ static DebugFrame debug_frame = { .cie.data_align = 0x78, /* sleb128 -8 */ .cie.return_column = 16, - .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */ - .fde.def_cfa = { + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { 12, 7, /* DW_CFA_def_cfa %rsp, ... */ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ (FRAME_SIZE >> 7) }, - .fde.reg_ofs = { + .fde_reg_ofs = { 0x90, 1, /* DW_CFA_offset, %rip, -8 */ /* The following ordering must match tcg_target_callee_save_regs. */ 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ @@ -2340,13 +2362,15 @@ static DebugFrame debug_frame = { .cie.data_align = 0x7c, /* sleb128 -4 */ .cie.return_column = 8, - .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */ - .fde.def_cfa = { + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { 12, 4, /* DW_CFA_def_cfa %esp, ... */ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ (FRAME_SIZE >> 7) }, - .fde.reg_ofs = { + .fde_reg_ofs = { 0x88, 1, /* DW_CFA_offset, %eip, -4 */ /* The following ordering must match tcg_target_callee_save_regs. */ 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ @@ -2360,9 +2384,6 @@ static DebugFrame debug_frame = { #if defined(ELF_HOST_MACHINE) void tcg_register_jit(void *buf, size_t buf_size) { - /* We're expecting a 2 byte uleb128 encoded value. */ - assert(FRAME_SIZE >> 14 == 0); - debug_frame.fde.func_start = (tcg_target_long) buf; debug_frame.fde.func_len = buf_size; diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index dbc675641..e3f6bb965 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_I386 #define TCG_TARGET_I386 1 //#define TCG_TARGET_WORDS_BIGENDIAN @@ -90,12 +91,11 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 -#if defined(__x86_64__) || defined(__i686__) -/* Use cmov only if the compiler is already doing so. */ #define TCG_TARGET_HAS_movcond_i32 1 -#else -#define TCG_TARGET_HAS_movcond_i32 0 -#endif +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_muls2_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -118,6 +118,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 1 +#define TCG_TARGET_HAS_muls2_i64 1 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ @@ -135,3 +139,5 @@ static inline void flush_icache_range(tcg_target_ulong start, tcg_target_ulong stop) { } + +#endif diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 06570bea3..2373d9ef7 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1491,7 +1491,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, #if defined(CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 91fe7a3b0..f32d5199c 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -22,6 +22,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_IA64 #define TCG_TARGET_IA64 1 /* We only map the first 64 registers */ @@ -103,7 +104,9 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_div_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_bswap16_i32 1 @@ -135,6 +138,14 @@ typedef enum { #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) @@ -158,3 +169,5 @@ static inline void flush_icache_range(tcg_target_ulong start, } asm volatile (";;sync.i;;srlz.i;;"); } + +#endif diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index ae2b274d8..793532ec9 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -920,7 +920,7 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, #if defined(CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1413,6 +1413,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); #endif break; + case INDEX_op_muls2_i32: + tcg_out_opc_reg(s, OPC_MULT, 0, args[2], args[3]); + tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); + break; case INDEX_op_mulu2_i32: tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]); tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); @@ -1595,6 +1600,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, @@ -1611,19 +1617,29 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, +#if TCG_TARGET_HAS_rot_i32 { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, +#endif +#if TCG_TARGET_HAS_bswap16_i32 { INDEX_op_bswap16_i32, { "r", "r" } }, +#endif +#if TCG_TARGET_HAS_bswap32_i32 { INDEX_op_bswap32_i32, { "r", "r" } }, +#endif { INDEX_op_ext8s_i32, { "r", "rZ" } }, { INDEX_op_ext16s_i32, { "r", "rZ" } }, +#if TCG_TARGET_HAS_deposit_i32 { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, +#endif { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if TCG_TARGET_HAS_movcond_i32 { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 65b5c59e8..a438950bc 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -23,6 +23,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_MIPS #define TCG_TARGET_MIPS 1 #ifdef __MIPSEB__ @@ -78,6 +79,7 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 @@ -86,6 +88,7 @@ typedef enum { #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_muls2_i32 1 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ @@ -127,3 +130,5 @@ static inline void flush_icache_range(tcg_target_ulong start, { cacheflush ((void *)start, stop-start, ICACHE); } + +#endif diff --git a/tcg/optimize.c b/tcg/optimize.c index 9109b813e..b35868afb 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -46,6 +46,7 @@ struct tcg_temp_info { uint16_t prev_copy; uint16_t next_copy; tcg_target_ulong val; + tcg_target_ulong mask; }; static struct tcg_temp_info temps[TCG_MAX_TEMPS]; @@ -63,6 +64,17 @@ static void reset_temp(TCGArg temp) } } temps[temp].state = TCG_TEMP_UNDEF; + temps[temp].mask = -1; +} + +/* Reset all temporaries, given that there are NB_TEMPS of them. */ +static void reset_all_temps(int nb_temps) +{ + int i; + for (i = 0; i < nb_temps; i++) { + temps[i].state = TCG_TEMP_UNDEF; + temps[i].mask = -1; + } } static int op_bits(TCGOpcode op) @@ -139,33 +151,35 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst, TCGArg src) { - reset_temp(dst); - assert(temps[src].state != TCG_TEMP_CONST); - - if (s->temps[src].type == s->temps[dst].type) { - if (temps[src].state != TCG_TEMP_COPY) { - temps[src].state = TCG_TEMP_COPY; - temps[src].next_copy = src; - temps[src].prev_copy = src; - } - temps[dst].state = TCG_TEMP_COPY; - temps[dst].next_copy = temps[src].next_copy; - temps[dst].prev_copy = src; - temps[temps[dst].next_copy].prev_copy = dst; - temps[src].next_copy = dst; + reset_temp(dst); + temps[dst].mask = temps[src].mask; + assert(temps[src].state != TCG_TEMP_CONST); + + if (s->temps[src].type == s->temps[dst].type) { + if (temps[src].state != TCG_TEMP_COPY) { + temps[src].state = TCG_TEMP_COPY; + temps[src].next_copy = src; + temps[src].prev_copy = src; } + temps[dst].state = TCG_TEMP_COPY; + temps[dst].next_copy = temps[src].next_copy; + temps[dst].prev_copy = src; + temps[temps[dst].next_copy].prev_copy = dst; + temps[src].next_copy = dst; + } - gen_args[0] = dst; - gen_args[1] = src; + gen_args[0] = dst; + gen_args[1] = src; } static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) { - reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; - temps[dst].val = val; - gen_args[0] = dst; - gen_args[1] = val; + reset_temp(dst); + temps[dst].state = TCG_TEMP_CONST; + temps[dst].val = val; + temps[dst].mask = val; + gen_args[0] = dst; + gen_args[1] = val; } static TCGOpcode op_to_mov(TCGOpcode op) @@ -470,6 +484,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_defs) { int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; + tcg_target_ulong mask, affected; TCGOpcode op; const TCGOpDef *def; TCGArg *gen_args; @@ -482,7 +497,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, nb_temps = s->nb_temps; nb_globals = s->nb_globals; - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + reset_all_temps(nb_temps); nb_ops = tcg_opc_ptr - s->gen_opc_buf; gen_args = args; @@ -539,11 +554,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[5] = tcg_invert_cond(args[5]); } break; - case INDEX_op_add2_i32: + CASE_OP_32_64(add2): swap_commutative(args[0], &args[2], &args[4]); swap_commutative(args[1], &args[3], &args[5]); break; - case INDEX_op_mulu2_i32: + CASE_OP_32_64(mulu2): + CASE_OP_32_64(muls2): swap_commutative(args[0], &args[2], &args[3]); break; case INDEX_op_brcond2_i32: @@ -560,7 +576,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */ + /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", + and "sub r, 0, a => neg r, a" case. */ switch (op) { CASE_OP_32_64(shl): CASE_OP_32_64(shr): @@ -576,6 +593,37 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, continue; } break; + CASE_OP_32_64(sub): + { + TCGOpcode neg_op; + bool have_neg; + + if (temps[args[2]].state == TCG_TEMP_CONST) { + /* Proceed with possible constant folding. */ + break; + } + if (op == INDEX_op_sub_i32) { + neg_op = INDEX_op_neg_i32; + have_neg = TCG_TARGET_HAS_neg_i32; + } else { + neg_op = INDEX_op_neg_i64; + have_neg = TCG_TARGET_HAS_neg_i64; + } + if (!have_neg) { + break; + } + if (temps[args[1]].state == TCG_TEMP_CONST + && temps[args[1]].val == 0) { + s->gen_opc_buf[op_index] = neg_op; + reset_temp(args[0]); + gen_args[0] = args[0]; + gen_args[1] = args[2]; + args += 3; + gen_args += 2; + continue; + } + } + break; default: break; } @@ -612,6 +660,113 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } + /* Simplify using known-zero bits */ + mask = -1; + affected = -1; + switch (op) { + CASE_OP_32_64(ext8s): + if ((temps[args[1]].mask & 0x80) != 0) { + break; + } + CASE_OP_32_64(ext8u): + mask = 0xff; + goto and_const; + CASE_OP_32_64(ext16s): + if ((temps[args[1]].mask & 0x8000) != 0) { + break; + } + CASE_OP_32_64(ext16u): + mask = 0xffff; + goto and_const; + case INDEX_op_ext32s_i64: + if ((temps[args[1]].mask & 0x80000000) != 0) { + break; + } + case INDEX_op_ext32u_i64: + mask = 0xffffffffU; + goto and_const; + + CASE_OP_32_64(and): + mask = temps[args[2]].mask; + if (temps[args[2]].state == TCG_TEMP_CONST) { + and_const: + affected = temps[args[1]].mask & ~mask; + } + mask = temps[args[1]].mask & mask; + break; + + CASE_OP_32_64(sar): + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = ((tcg_target_long)temps[args[1]].mask + >> temps[args[2]].val); + } + break; + + CASE_OP_32_64(shr): + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = temps[args[1]].mask >> temps[args[2]].val; + } + break; + + CASE_OP_32_64(shl): + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = temps[args[1]].mask << temps[args[2]].val; + } + break; + + CASE_OP_32_64(neg): + /* Set to 1 all bits to the left of the rightmost. */ + mask = -(temps[args[1]].mask & -temps[args[1]].mask); + break; + + CASE_OP_32_64(deposit): + tmp = ((1ull << args[4]) - 1); + mask = ((temps[args[1]].mask & ~(tmp << args[3])) + | ((temps[args[2]].mask & tmp) << args[3])); + break; + + CASE_OP_32_64(or): + CASE_OP_32_64(xor): + mask = temps[args[1]].mask | temps[args[2]].mask; + break; + + CASE_OP_32_64(setcond): + mask = 1; + break; + + CASE_OP_32_64(movcond): + mask = temps[args[3]].mask | temps[args[4]].mask; + break; + + default: + break; + } + + if (mask == 0) { + assert(def->nb_oargs == 1); + s->gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], 0); + args += def->nb_oargs + def->nb_iargs + def->nb_cargs; + gen_args += 2; + continue; + } + if (affected == 0) { + assert(def->nb_oargs == 1); + if (temps_are_copies(args[0], args[1])) { + s->gen_opc_buf[op_index] = INDEX_op_nop; + } else if (temps[args[1]].state != TCG_TEMP_CONST) { + s->gen_opc_buf[op_index] = op_to_mov(op); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + gen_args += 2; + } else { + s->gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); + gen_args += 2; + } + args += def->nb_iargs + 1; + continue; + } + /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ switch (op) { CASE_OP_32_64(and): @@ -768,7 +923,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); if (tmp != 2) { if (tmp) { - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[3]; gen_args += 1; @@ -861,7 +1016,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); if (tmp != 2) { if (tmp) { - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[5]; gen_args += 1; @@ -875,7 +1030,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + reset_all_temps(nb_temps); s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; gen_args[0] = args[1]; gen_args[1] = args[3]; @@ -902,6 +1057,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[2]; gen_args[2] = args[4]; @@ -938,9 +1094,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Default case: we know nothing about operation (or were unable to compute the operation result) so no propagation is done. We trash everything if the operation is the end of a basic - block, otherwise we only trash the output args. */ + block, otherwise we only trash the output args. "mask" is + the non-zero bits mask for the first output arg. */ if (def->flags & TCG_OPF_BB_END) { - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + reset_all_temps(nb_temps); } else { for (i = 0; i < def->nb_oargs; i++) { reset_temp(args[i]); diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index d72d39627..453ab6b58 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -549,7 +549,7 @@ static void add_qemu_ldst_label (TCGContext *s, label->label_ptr[0] = label_ptr; } -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1671,18 +1671,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2])); break; - case INDEX_op_rem_i32: - tcg_out32 (s, DIVW | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLW | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; - - case INDEX_op_remu_i32: - tcg_out32 (s, DIVWU | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLW | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; - case INDEX_op_mulu2_i32: if (args[0] == args[2] || args[0] == args[3]) { tcg_out32 (s, MULLW | TAB (0, args[2], args[3])); @@ -1992,8 +1980,6 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_mul_i32, { "r", "r", "ri" } }, { INDEX_op_div_i32, { "r", "r", "r" } }, { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_rem_i32, { "r", "r", "r" } }, - { INDEX_op_remu_i32, { "r", "r", "r" } }, { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, { INDEX_op_sub_i32, { "r", "r", "ri" } }, { INDEX_op_and_i32, { "r", "r", "ri" } }, diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index ad433ae5b..b42d97cc2 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_PPC #define TCG_TARGET_PPC 1 #define TCG_TARGET_WORDS_BIGENDIAN @@ -77,6 +78,7 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 @@ -93,9 +95,12 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_AREG0 TCG_REG_R27 #define tcg_qemu_tb_exec(env, tb_ptr) \ ((long __attribute__ ((longcall)) \ - (*)(void *, void *))code_gen_prologue)(env, tb_ptr) + (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) + +#endif diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 5403fc1f9..0678de204 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -22,7 +22,12 @@ * THE SOFTWARE. */ -#define TCG_CT_CONST_U32 0x100 +#define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 +#define TCG_CT_CONST_S32 0x400 +#define TCG_CT_CONST_U32 0x800 +#define TCG_CT_CONST_ZERO 0x1000 +#define TCG_CT_CONST_MONE 0x2000 static uint8_t *tb_ret_addr; @@ -40,6 +45,16 @@ static uint8_t *tb_ret_addr; #define GUEST_BASE 0 #endif +#ifdef CONFIG_GETAUXVAL +#include <sys/auxv.h> +static bool have_isa_2_06; +#define HAVE_ISA_2_06 have_isa_2_06 +#define HAVE_ISEL have_isa_2_06 +#else +#define HAVE_ISA_2_06 0 +#define HAVE_ISEL 0 +#endif + #ifdef CONFIG_USE_GUEST_BASE #define TCG_GUEST_BASE_REG 30 #else @@ -242,9 +257,24 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6); #endif break; - case 'Z': + case 'I': + ct->ct |= TCG_CT_CONST_S16; + break; + case 'J': + ct->ct |= TCG_CT_CONST_U16; + break; + case 'M': + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'T': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'U': ct->ct |= TCG_CT_CONST_U32; break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; default: return -1; } @@ -257,19 +287,29 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) static int tcg_target_const_match (tcg_target_long val, const TCGArgConstraint *arg_ct) { - int ct; - - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { return 1; - else if ((ct & TCG_CT_CONST_U32) && (val == (uint32_t) val)) + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } return 0; } #define OPCD(opc) ((opc)<<26) #define XO19(opc) (OPCD(19)|((opc)<<1)) -#define XO30(opc) (OPCD(30)|((opc)<<2)) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) #define XO31(opc) (OPCD(31)|((opc)<<1)) #define XO58(opc) (OPCD(58)|(opc)) #define XO62(opc) (OPCD(62)|(opc)) @@ -306,15 +346,19 @@ static int tcg_target_const_match (tcg_target_long val, #define MULLI OPCD( 7) #define CMPLI OPCD( 10) #define CMPI OPCD( 11) +#define SUBFIC OPCD( 8) #define LWZU OPCD( 33) #define STWU OPCD( 37) +#define RLWIMI OPCD( 20) #define RLWINM OPCD( 21) +#define RLWNM OPCD( 23) -#define RLDICL XO30( 0) -#define RLDICR XO30( 1) -#define RLDIMI XO30( 3) +#define RLDICL MD30( 0) +#define RLDICR MD30( 1) +#define RLDIMI MD30( 3) +#define RLDCL MDS30( 8) #define BCLR XO19( 16) #define BCCTR XO19(528) @@ -329,11 +373,15 @@ static int tcg_target_const_match (tcg_target_long val, #define EXTSW XO31(986) #define ADD XO31(266) #define ADDE XO31(138) +#define ADDME XO31(234) +#define ADDZE XO31(202) #define ADDC XO31( 10) #define AND XO31( 28) #define SUBF XO31( 40) #define SUBFC XO31( 8) #define SUBFE XO31(136) +#define SUBFME XO31(232) +#define SUBFZE XO31(200) #define OR XO31(444) #define XOR XO31(316) #define MULLW XO31(235) @@ -344,16 +392,24 @@ static int tcg_target_const_match (tcg_target_long val, #define CMPL XO31( 32) #define LHBRX XO31(790) #define LWBRX XO31(534) +#define LDBRX XO31(532) #define STHBRX XO31(918) #define STWBRX XO31(662) +#define STDBRX XO31(660) #define MFSPR XO31(339) #define MTSPR XO31(467) #define SRAWI XO31(824) #define NEG XO31(104) #define MFCR XO31( 19) +#define MFOCRF (MFCR | (1u << 20)) #define NOR XO31(124) #define CNTLZW XO31( 26) #define CNTLZD XO31( 58) +#define ANDC XO31( 60) +#define ORC XO31(412) +#define EQV XO31(284) +#define NAND XO31(476) +#define ISEL XO31( 15) #define MULLD XO31(233) #define MULHD XO31( 73) @@ -395,17 +451,21 @@ static int tcg_target_const_match (tcg_target_long val, #define ME(e) ((e)<<1) #define BO(o) ((o)<<21) #define MB64(b) ((b)<<5) +#define FXM(b) (1 << (19 - (b))) #define LK 1 -#define TAB(t,a,b) (RT(t) | RA(a) | RB(b)) -#define SAB(s,a,b) (RS(s) | RA(a) | RB(b)) +#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) +#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) +#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) +#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) #define BF(n) ((n)<<23) #define BI(n, c) (((c)+((n)*4))<<16) #define BT(n, c) (((c)+((n)*4))<<21) #define BA(n, c) (((c)+((n)*4))<<16) #define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) #define BO_COND_TRUE BO (12) #define BO_COND_FALSE BO ( 4) @@ -431,56 +491,201 @@ static const uint32_t tcg_to_bc[] = { [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, }; -static void tcg_out_mov (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +/* The low bit here is set if the RA and RB fields must be inverted. */ +static const uint32_t tcg_to_isel[] = { + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(7, CR_LT), + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(7, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) { tcg_out32 (s, OR | SAB (arg, ret, arg)); } -static void tcg_out_rld (TCGContext *s, int op, int ra, int rs, int sh, int mb) +static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb) { sh = SH (sh & 0x1f) | (((sh >> 5) & 1) << 1); mb = MB64 ((mb >> 5) | ((mb << 1) & 0x3f)); tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb); } -static void tcg_out_movi32 (TCGContext *s, int ret, int32_t arg) +static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me) { - if (arg == (int16_t) arg) - tcg_out32 (s, ADDI | RT (ret) | RA (0) | (arg & 0xffff)); - else { - tcg_out32 (s, ADDIS | RT (ret) | RA (0) | ((arg >> 16) & 0xffff)); - if (arg & 0xffff) - tcg_out32 (s, ORI | RS (ret) | RA (ret) | (arg & 0xffff)); - } + tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); +} + +static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out_rld(s, RLDICL, dst, src, 0, 32); +} + +static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); } -static void tcg_out_movi (TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) +static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) { - int32_t arg32 = arg; - arg = type == TCG_TYPE_I32 ? arg & 0xffffffff : arg; + tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); +} - if (arg == arg32) { - tcg_out_movi32 (s, ret, arg32); +static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (arg == (int16_t) arg) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + } else { + tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } } - else { - if ((uint64_t) arg >> 32) { - uint16_t h16 = arg >> 16; - uint16_t l16 = arg; - - tcg_out_movi32 (s, ret, arg >> 32); - tcg_out_rld (s, RLDICR, ret, ret, 32, 31); - if (h16) tcg_out32 (s, ORIS | RS (ret) | RA (ret) | h16); - if (l16) tcg_out32 (s, ORI | RS (ret) | RA (ret) | l16); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long arg) +{ + if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { + tcg_out_movi32(s, ret, arg); + } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } else { + int32_t high = arg >> 32; + tcg_out_movi32(s, ret, high); + if (high) { + tcg_out_shli64(s, ret, ret, 32); } - else { - tcg_out_movi32 (s, ret, arg32); - if (arg32 < 0) - tcg_out_rld (s, RLDICL, ret, ret, 0, 32); + if (arg & 0xffff0000) { + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } + } +} + +static bool mask_operand(uint32_t c, int *mb, int *me) +{ + uint32_t lsb, test; + + /* Accept a bit pattern like: + 0....01....1 + 1....10....0 + 0..01..10..0 + Keep track of the transitions. */ + if (c == 0 || c == -1) { + return false; + } + test = c; + lsb = test & -test; + test += lsb; + if (test & (test - 1)) { + return false; + } + + *me = clz32(lsb); + *mb = test ? clz32(test & -test) + 1 : 0; + return true; +} + +static bool mask64_operand(uint64_t c, int *mb, int *me) +{ + uint64_t lsb; + + if (c == 0) { + return false; + } + + lsb = c & -c; + /* Accept 1..10..0. */ + if (c == -lsb) { + *mb = 0; + *me = clz64(lsb); + return true; + } + /* Accept 0..01..1. */ + if (lsb == 1 && (c & (c + 1)) == 0) { + *mb = clz64(c + 1) + 1; + *me = 63; + return true; + } + return false; +} + +static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + int mb, me; + + if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else { + tcg_out_movi(s, TCG_TYPE_I32, 0, c); + tcg_out32(s, AND | SAB(src, dst, 0)); + } +} + +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) +{ + int mb, me; + + if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else if (mask64_operand(c, &mb, &me)) { + if (mb == 0) { + tcg_out_rld(s, RLDICR, dst, src, 0, me); + } else { + tcg_out_rld(s, RLDICL, dst, src, 0, mb); } + } else { + tcg_out_movi(s, TCG_TYPE_I64, 0, c); + tcg_out32(s, AND | SAB(src, dst, 0)); + } +} + +static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, + int op_lo, int op_hi) +{ + if (c >> 16) { + tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); + src = dst; + } + if (c & 0xffff) { + tcg_out32(s, op_lo | SAI(src, dst, c)); + src = dst; } } +static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, ORI, ORIS); +} + +static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, XORI, XORIS); +} + static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) { tcg_target_long disp; @@ -522,31 +727,31 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) #endif } -static void tcg_out_ldst (TCGContext *s, int ret, int addr, - int offset, int op1, int op2) +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, + int offset, int op1, int op2) { - if (offset == (int16_t) offset) - tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I64, 0, offset); - tcg_out32 (s, op2 | RT (ret) | RA (addr) | RB (0)); + if (offset == (int16_t) offset) { + tcg_out32(s, op1 | TAI(ret, addr, offset)); + } else { + tcg_out_movi(s, TCG_TYPE_I64, 0, offset); + tcg_out32(s, op2 | TAB(ret, addr, 0)); } } -static void tcg_out_ldsta (TCGContext *s, int ret, int addr, - int offset, int op1, int op2) +static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, + int offset, int op1, int op2) { - if (offset == (int16_t) (offset & ~3)) - tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I64, 0, offset); - tcg_out32 (s, op2 | RT (ret) | RA (addr) | RB (0)); + if (offset == (int16_t) (offset & ~3)) { + tcg_out32(s, op1 | TAI(ret, addr, offset)); + } else { + tcg_out_movi(s, TCG_TYPE_I64, 0, offset); + tcg_out32(s, op2 | TAB(ret, addr, 0)); } } #if defined (CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -566,40 +771,28 @@ static const void * const qemu_st_helpers[4] = { helper_stq_mmu, }; -static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2, - int addr_reg, int s_bits, int offset) +static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg addr_reg, int s_bits, int offset) { #if TARGET_LONG_BITS == 32 - tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32); - - tcg_out32 (s, (RLWINM - | RA (r0) - | RS (addr_reg) - | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) - | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) - | ME (31 - CPU_TLB_ENTRY_BITS) - ) - ); - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0)); - tcg_out32 (s, (LWZU | RT (r1) | RA (r0) | offset)); - tcg_out32 (s, (RLWINM - | RA (r2) - | RS (addr_reg) - | SH (0) - | MB ((32 - s_bits) & 31) - | ME (31 - TARGET_PAGE_BITS) - ) - ); + tcg_out_ext32u(s, addr_reg, addr_reg); + + tcg_out_rlw(s, RLWINM, r0, addr_reg, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); + tcg_out32(s, LWZU | TAI(r1, r0, offset)); + tcg_out_rlw(s, RLWINM, r2, addr_reg, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); #else tcg_out_rld (s, RLDICL, r0, addr_reg, 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); - tcg_out_rld (s, RLDICR, r0, r0, - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS); + tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS); - tcg_out32 (s, ADD | TAB (r0, r0, TCG_AREG0)); - tcg_out32 (s, LD_ADDR | RT (r1) | RA (r0) | offset); + tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); + tcg_out32(s, LD_ADDR | TAI(r1, r0, offset)); if (!s_bits) { tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS); @@ -614,20 +807,44 @@ static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2, } #endif +static const uint32_t qemu_ldx_opc[8] = { +#ifdef TARGET_WORDS_BIGENDIAN + LBZX, LHZX, LWZX, LDX, + 0, LHAX, LWAX, LDX +#else + LBZX, LHBRX, LWBRX, LDBRX, + 0, 0, 0, LDBRX, +#endif +}; + +static const uint32_t qemu_stx_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STBX, STHX, STWX, STDX +#else + STBX, STHBRX, STWBRX, STDBRX, +#endif +}; + +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, r0, r1, rbase, bswap; + TCGReg addr_reg, data_reg, r0, r1, rbase; + uint32_t insn, s_bits; #ifdef CONFIG_SOFTMMU - int r2, mem_index, s_bits, ir; + TCGReg r2, ir; + int mem_index; void *label1_ptr, *label2_ptr; #endif data_reg = *args++; addr_reg = *args++; + s_bits = opc & 3; #ifdef CONFIG_SOFTMMU mem_index = *args; - s_bits = opc & 3; r0 = 3; r1 = 4; @@ -652,23 +869,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); - switch (opc) { - case 0|4: - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); - break; - case 1|4: - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3)); - break; - case 2|4: - tcg_out32 (s, EXTSW | RA (data_reg) | RS (3)); - break; - case 0: - case 1: - case 2: - case 3: - if (data_reg != 3) - tcg_out_mov (s, TCG_TYPE_I64, data_reg, 3); - break; + if (opc & 4) { + insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(data_reg) | RS(3)); + } else if (data_reg != 3) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3); } label2_ptr = s->code_ptr; tcg_out32 (s, B); @@ -679,84 +884,35 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) #endif /* r0 now contains &env->tlb_table[mem_index][index].addr_read */ - tcg_out32 (s, (LD - | RT (r0) - | RA (r0) - | (offsetof (CPUTLBEntry, addend) - - offsetof (CPUTLBEntry, addr_read)) - )); + tcg_out32(s, LD | TAI(r0, r0, + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_read))); /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); + tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); /* r0 = env->tlb_table[mem_index][index].addend + addr */ #else /* !CONFIG_SOFTMMU */ #if TARGET_LONG_BITS == 32 - tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32); + tcg_out_ext32u(s, addr_reg, addr_reg); #endif r0 = addr_reg; r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; -#else - bswap = 1; -#endif - switch (opc) { - default: - case 0: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); - break; - case 0|4: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg)); - break; - case 1: - if (bswap) - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0)); - break; - case 1|4: - if (bswap) { - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg)); - } - else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0)); - break; - case 2: - if (bswap) - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0)); - break; - case 2|4: - if (bswap) { - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSW | RA (data_reg) | RS (data_reg)); - } - else tcg_out32 (s, LWAX | TAB (data_reg, rbase, r0)); - break; - case 3: -#ifdef CONFIG_USE_GUEST_BASE - if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, LWBRX | TAB ( r1, rbase, r1)); - tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0); - } - else tcg_out32 (s, LDX | TAB (data_reg, rbase, r0)); -#else - if (bswap) { - tcg_out_movi32 (s, 0, 4); - tcg_out32 (s, LWBRX | RT (data_reg) | RB (r0)); - tcg_out32 (s, LWBRX | RT ( r1) | RA (r0)); - tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0); - } - else tcg_out32 (s, LD | RT (data_reg) | RA (r0)); -#endif - break; + insn = qemu_ldx_opc[opc]; + if (!HAVE_ISA_2_06 && insn == LDBRX) { + tcg_out32(s, ADDI | TAI(r1, r0, 4)); + tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0)); + tcg_out32(s, LWBRX | TAB( r1, rbase, r1)); + tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0); + } else if (insn) { + tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + } else { + insn = qemu_ldx_opc[s_bits]; + tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + insn = qemu_exts_opc[s_bits]; + tcg_out32 (s, insn | RA(data_reg) | RS(data_reg)); } #ifdef CONFIG_SOFTMMU @@ -766,9 +922,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, r0, r1, rbase, data_reg, bswap; + TCGReg addr_reg, r0, r1, rbase, data_reg; + uint32_t insn; #ifdef CONFIG_SOFTMMU - int r2, mem_index, ir; + TCGReg r2, ir; + int mem_index; void *label1_ptr, *label2_ptr; #endif @@ -817,48 +975,26 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) - offsetof (CPUTLBEntry, addr_write)) )); /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); + tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); /* r0 = env->tlb_table[mem_index][index].addend + addr */ #else /* !CONFIG_SOFTMMU */ #if TARGET_LONG_BITS == 32 - tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32); + tcg_out_ext32u(s, addr_reg, addr_reg); #endif r1 = 3; r0 = addr_reg; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; -#else - bswap = 1; -#endif - switch (opc) { - case 0: - tcg_out32 (s, STBX | SAB (data_reg, rbase, r0)); - break; - case 1: - if (bswap) - tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STHX | SAB (data_reg, rbase, r0)); - break; - case 2: - if (bswap) - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STWX | SAB (data_reg, rbase, r0)); - break; - case 3: - if (bswap) { - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out_rld (s, RLDICL, 0, data_reg, 32, 0); - tcg_out32 (s, STWBRX | SAB (0, rbase, r1)); - } - else tcg_out32 (s, STDX | SAB (data_reg, rbase, r0)); - break; + insn = qemu_stx_opc[opc]; + if (!HAVE_ISA_2_06 && insn == STDBRX) { + tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0)); + tcg_out32(s, ADDI | TAI(r1, r0, 4)); + tcg_out_shri64(s, 0, data_reg, 32); + tcg_out32(s, STWBRX | SAB(0, rbase, r1)); + } else { + tcg_out32(s, insn | SAB(data_reg, rbase, r0)); } #ifdef CONFIG_SOFTMMU @@ -930,10 +1066,10 @@ static void tcg_target_qemu_prologue (TCGContext *s) | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) ) ); - tcg_out32 (s, LD | RT (0) | RA (1) | (frame_size + 16)); - tcg_out32 (s, MTSPR | RS (0) | LR); - tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); - tcg_out32 (s, BCLR | BO_ALWAYS); + tcg_out32(s, LD | TAI(0, 1, frame_size + 16)); + tcg_out32(s, MTSPR | RS(0) | LR); + tcg_out32(s, ADDI | TAI(1, 1, frame_size)); + tcg_out32(s, BCLR | BO_ALWAYS); } static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, @@ -954,38 +1090,17 @@ static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX); } -static void ppc_addi32 (TCGContext *s, int rt, int ra, tcg_target_long si) -{ - if (!si && rt == ra) - return; - - if (si == (int16_t) si) - tcg_out32 (s, ADDI | RT (rt) | RA (ra) | (si & 0xffff)); - else { - uint16_t h = ((si >> 16) & 0xffff) + ((uint16_t) si >> 15); - tcg_out32 (s, ADDIS | RT (rt) | RA (ra) | h); - tcg_out32 (s, ADDI | RT (rt) | RA (rt) | (si & 0xffff)); - } -} - -static void ppc_addi64 (TCGContext *s, int rt, int ra, tcg_target_long si) -{ - /* XXX: suboptimal */ - if (si == (int16_t) si - || ((((uint64_t) si >> 31) == 0) && (si & 0x8000) == 0)) - ppc_addi32 (s, rt, ra, si); - else { - tcg_out_movi (s, TCG_TYPE_I64, 0, si); - tcg_out32 (s, ADD | RT (rt) | RA (ra)); - } -} - -static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, int arch64) +static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, + int const_arg2, int cr, TCGType type) { int imm; uint32_t op; + /* Simplify the comparisons below wrt CMPI. */ + if (type == TCG_TYPE_I32) { + arg2 = (int32_t)arg2; + } + switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: @@ -1038,96 +1153,132 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, default: tcg_abort (); } - op |= BF (cr) | (arch64 << 21); + op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - if (imm) - tcg_out32 (s, op | RA (arg1) | (arg2 & 0xffff)); - else { + if (imm) { + tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); + } else { if (const_arg2) { - tcg_out_movi (s, TCG_TYPE_I64, 0, arg2); - tcg_out32 (s, op | RA (arg1) | RB (0)); + tcg_out_movi(s, type, 0, arg2); + arg2 = 0; } - else - tcg_out32 (s, op | RA (arg1) | RB (arg2)); + tcg_out32(s, op | RA(arg1) | RB(arg2)); } +} +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src) +{ + tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst)); + tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5); } -static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) { - int crop, sh, arg; + /* X != 0 implies X + -1 generates a carry. Extra addition + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ + if (dst != src) { + tcg_out32(s, ADDIC | TAI(dst, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, dst, src)); + } else { + tcg_out32(s, ADDIC | TAI(0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, 0, src)); + } +} - switch (cond) { - case TCG_COND_EQ: - if (const_arg2) { - if (!arg2) { - arg = arg1; - } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2); - } - else { - tcg_out_movi (s, type, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } - } - } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, + bool const_arg2) +{ + if (const_arg2) { + if ((uint32_t)arg2 == arg2) { + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); } + } else { + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); + } + return TCG_REG_R0; +} - if (type == TCG_TYPE_I64) { - tcg_out32 (s, CNTLZD | RS (arg) | RA (0)); - tcg_out_rld (s, RLDICL, arg0, 0, 58, 6); - } - else { - tcg_out32 (s, CNTLZW | RS (arg) | RA (0)); - tcg_out32 (s, (RLWINM - | RA (arg0) - | RS (0) - | SH (27) - | MB (5) - | ME (31) - ) - ); - } - break; +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) +{ + int crop, sh; - case TCG_COND_NE: - if (const_arg2) { - if (!arg2) { - arg = arg1; - } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2); - } - else { - tcg_out_movi (s, type, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } + /* Ignore high bits of a potential constant arg2. */ + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } + + /* Handle common and trivial cases before handling anything else. */ + if (arg2 == 0) { + switch (cond) { + case TCG_COND_EQ: + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + case TCG_COND_NE: + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + case TCG_COND_GE: + tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); + arg1 = arg0; + /* FALLTHRU */ + case TCG_COND_LT: + /* Extract the sign bit. */ + tcg_out_rld(s, RLDICL, arg0, arg1, + type == TCG_TYPE_I64 ? 1 : 33, 63); + return; + default: + break; } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); - } + } - if (arg == arg1 && arg1 == arg0) { - tcg_out32 (s, ADDIC | RT (0) | RA (arg) | 0xffff); - tcg_out32 (s, SUBFE | TAB (arg0, 0, arg)); + /* If we have ISEL, we can implement everything with 3 or 4 insns. + All other cases below are also at least 3 insns, so speed up the + code generator by not considering them and always using ISEL. */ + if (HAVE_ISEL) { + int isel, tab; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + + isel = tcg_to_isel[cond]; + + tcg_out_movi(s, type, arg0, 1); + if (isel & 1) { + /* arg0 = (bc ? 0 : 1) */ + tab = TAB(arg0, 0, arg0); + isel &= ~1; + } else { + /* arg0 = (bc ? 1 : 0) */ + tcg_out_movi(s, type, TCG_REG_R0, 0); + tab = TAB(arg0, arg0, TCG_REG_R0); } - else { - tcg_out32 (s, ADDIC | RT (arg0) | RA (arg) | 0xffff); - tcg_out32 (s, SUBFE | TAB (arg0, arg0, arg)); + tcg_out32(s, isel | tab); + return; + } + + switch (cond) { + case TCG_COND_EQ: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + + case TCG_COND_NE: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + /* Discard the high bits only once, rather than both inputs. */ + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } - break; + tcg_out_setcond_ne0(s, arg0, arg1); + return; case TCG_COND_GT: case TCG_COND_GTU: @@ -1152,17 +1303,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond, sh = 31; crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT); crtest: - tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type == TCG_TYPE_I64); - if (crop) tcg_out32 (s, crop); - tcg_out32 (s, MFCR | RT (0)); - tcg_out32 (s, (RLWINM - | RA (arg0) - | RS (0) - | SH (sh) - | MB (31) - | ME (31) - ) - ); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + if (crop) { + tcg_out32(s, crop); + } + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); break; default: @@ -1185,12 +1331,60 @@ static void tcg_out_bc (TCGContext *s, int bc, int label_index) } } -static void tcg_out_brcond (TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index, int arch64) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, TCGType type) { - tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, arch64); - tcg_out_bc (s, tcg_to_bc[cond], label_index); + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_bc(s, tcg_to_bc[cond], label_index); +} + +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, + TCGArg v2, bool const_c2) +{ + /* If for some reason both inputs are zero, don't produce bad code. */ + if (v1 == 0 && v2 == 0) { + tcg_out_movi(s, type, dest, 0); + return; + } + + tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + + if (HAVE_ISEL) { + int isel = tcg_to_isel[cond]; + + /* Swap the V operands if the operation indicates inversion. */ + if (isel & 1) { + int t = v1; + v1 = v2; + v2 = t; + isel &= ~1; + } + /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ + if (v2 == 0) { + tcg_out_movi(s, type, 0, 0); + } + tcg_out32(s, isel | TAB(dest, v1, v2)); + } else { + if (dest == v2) { + cond = tcg_invert_cond(cond); + v2 = v1; + } else if (dest != v1) { + if (v1 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v1); + } + } + /* Branch forward over one insn */ + tcg_out32(s, tcg_to_bc[cond] | 8); + if (v2 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v2); + } + } } void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) @@ -1207,6 +1401,7 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { + TCGArg a0, a1, a2; int c; switch (opc) { @@ -1295,88 +1490,123 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_add_i32: - if (const_args[2]) - ppc_addi32 (s, args[0], args[1], args[2]); - else - tcg_out32 (s, ADD | TAB (args[0], args[1], args[2])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + int32_t l, h; + do_addi_32: + l = (int16_t)a2; + h = a2 - l; + if (h) { + tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16)); + a1 = a0; + } + if (l || a0 != a1) { + tcg_out32(s, ADDI | TAI(a0, a1, l)); + } + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } break; case INDEX_op_sub_i32: - if (const_args[2]) - ppc_addi32 (s, args[0], args[1], -args[2]); - else - tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } break; - case INDEX_op_and_i64: case INDEX_op_and_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if ((args[2] & 0xffff) == args[2]) - tcg_out32 (s, ANDI | RS (args[1]) | RA (args[0]) | args[2]); - else if ((args[2] & 0xffff0000) == args[2]) - tcg_out32 (s, ANDIS | RS (args[1]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - else { - tcg_out_movi (s, (opc == INDEX_op_and_i32 - ? TCG_TYPE_I32 - : TCG_TYPE_I64), - 0, args[2]); - tcg_out32 (s, AND | SAB (args[1], args[0], 0)); - } + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_and_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); } - else - tcg_out32 (s, AND | SAB (args[1], args[0], args[2])); break; case INDEX_op_or_i64: case INDEX_op_or_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] & 0xffff) { - tcg_out32 (s, ORI | RS (args[1]) | RA (args[0]) - | (args[2] & 0xffff)); - if (args[2] >> 16) - tcg_out32 (s, ORIS | RS (args[0]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - } - else { - tcg_out32 (s, ORIS | RS (args[1]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - } + tcg_out_ori32(s, a0, a1, a2); + } else { + tcg_out32(s, OR | SAB(a1, a0, a2)); } - else - tcg_out32 (s, OR | SAB (args[1], args[0], args[2])); break; case INDEX_op_xor_i64: case INDEX_op_xor_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if ((args[2] & 0xffff) == args[2]) - tcg_out32 (s, XORI | RS (args[1]) | RA (args[0]) - | (args[2] & 0xffff)); - else if ((args[2] & 0xffff0000) == args[2]) - tcg_out32 (s, XORIS | RS (args[1]) | RA (args[0]) - | ((args[2] >> 16) & 0xffff)); - else { - tcg_out_movi (s, (opc == INDEX_op_and_i32 - ? TCG_TYPE_I32 - : TCG_TYPE_I64), - 0, args[2]); - tcg_out32 (s, XOR | SAB (args[1], args[0], 0)); - } + tcg_out_xori32(s, a0, a1, a2); + } else { + tcg_out32(s, XOR | SAB(a1, a0, a2)); } - else - tcg_out32 (s, XOR | SAB (args[1], args[0], args[2])); + break; + case INDEX_op_andc_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_orc_i32: + if (const_args[2]) { + tcg_out_ori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_orc_i64: + tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_eqv_i32: + if (const_args[2]) { + tcg_out_xori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_eqv_i64: + tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); break; case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] == (int16_t) args[2]) - tcg_out32 (s, MULLI | RT (args[0]) | RA (args[1]) - | (args[2] & 0xffff)); - else { - tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]); - tcg_out32 (s, MULLW | TAB (args[0], args[1], 0)); - } + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLW | TAB(a0, a1, a2)); } - else - tcg_out32 (s, MULLW | TAB (args[0], args[1], args[2])); break; case INDEX_op_div_i32: @@ -1387,45 +1617,19 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2])); break; - case INDEX_op_rem_i32: - tcg_out32 (s, DIVW | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLW | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; - - case INDEX_op_remu_i32: - tcg_out32 (s, DIVWU | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLW | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; - case INDEX_op_shl_i32: if (const_args[2]) { - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (args[2]) - | MB (0) - | ME (31 - args[2]) - ) - ); - } - else + tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]); + } else { tcg_out32 (s, SLW | SAB (args[1], args[0], args[2])); + } break; case INDEX_op_shr_i32: if (const_args[2]) { - tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (32 - args[2]) - | MB (args[2]) - | ME (31) - ) - ); - } - else + tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31); + } else { tcg_out32 (s, SRW | SAB (args[1], args[0], args[2])); + } break; case INDEX_op_sar_i32: if (const_args[2]) @@ -1433,13 +1637,32 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, else tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2])); break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); + } else { + tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) + | MB(0) | ME(31)); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); + } else { + tcg_out32(s, SUBFIC | TAI(0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], 0) + | MB(0) | ME(31)); + } + break; case INDEX_op_brcond_i32: - tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3], 0); + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + args[3], TCG_TYPE_I32); break; case INDEX_op_brcond_i64: - tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3], 1); + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + args[3], TCG_TYPE_I64); break; case INDEX_op_neg_i32: @@ -1453,27 +1676,63 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_add_i64: - if (const_args[2]) - ppc_addi64 (s, args[0], args[1], args[2]); - else - tcg_out32 (s, ADD | TAB (args[0], args[1], args[2])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + int32_t l0, h1, h2; + do_addi_64: + /* We can always split any 32-bit signed constant into 3 pieces. + Note the positive 0x80000000 coming from the sub_i64 path, + handled with the same code we need for eg 0x7fff8000. */ + assert(a2 == (int32_t)a2 || a2 == 0x80000000); + l0 = (int16_t)a2; + h1 = a2 - l0; + h2 = 0; + if (h1 < 0 && (int64_t)a2 > 0) { + h2 = 0x40000000; + h1 = a2 - h2 - l0; + } + assert((TCGArg)h2 + h1 + l0 == a2); + + if (h2) { + tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16)); + a1 = a0; + } + if (h1) { + tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16)); + a1 = a0; + } + if (l0 || a0 != a1) { + tcg_out32(s, ADDI | TAI(a0, a1, l0)); + } + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } break; case INDEX_op_sub_i64: - if (const_args[2]) - ppc_addi64 (s, args[0], args[1], -args[2]); - else - tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } break; case INDEX_op_shl_i64: if (const_args[2]) - tcg_out_rld (s, RLDICR, args[0], args[1], args[2], 63 - args[2]); + tcg_out_shli64(s, args[0], args[1], args[2]); else tcg_out32 (s, SLD | SAB (args[1], args[0], args[2])); break; case INDEX_op_shr_i64: if (const_args[2]) - tcg_out_rld (s, RLDICL, args[0], args[1], 64 - args[2], args[2]); + tcg_out_shri64(s, args[0], args[1], args[2]); else tcg_out32 (s, SRD | SAB (args[1], args[0], args[2])); break; @@ -1485,9 +1744,29 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, else tcg_out32 (s, SRAD | SAB (args[1], args[0], args[2])); break; + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); + } else { + tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); + } else { + tcg_out32(s, SUBFIC | TAI(0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], 0) | MB64(0)); + } + break; case INDEX_op_mul_i64: - tcg_out32 (s, MULLD | TAB (args[0], args[1], args[2])); + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLD | TAB(a0, a1, a2)); + } break; case INDEX_op_div_i64: tcg_out32 (s, DIVD | TAB (args[0], args[1], args[2])); @@ -1495,16 +1774,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_divu_i64: tcg_out32 (s, DIVDU | TAB (args[0], args[1], args[2])); break; - case INDEX_op_rem_i64: - tcg_out32 (s, DIVD | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLD | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; - case INDEX_op_remu_i64: - tcg_out32 (s, DIVDU | TAB (0, args[1], args[2])); - tcg_out32 (s, MULLD | TAB (0, 0, args[2])); - tcg_out32 (s, SUBF | TAB (args[0], 0, args[1])); - break; case INDEX_op_qemu_ld8u: tcg_out_qemu_ld (s, args, 0); @@ -1556,10 +1825,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32 (s, c | RS (args[1]) | RA (args[0])); break; - case INDEX_op_ext32u_i64: - tcg_out_rld (s, RLDICL, args[0], args[1], 0, 32); - break; - case INDEX_op_setcond_i32: tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], const_args[2]); @@ -1569,6 +1834,172 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, const_args[2]); break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1]; + /* a1 = abcd */ + if (a0 != a1) { + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ + tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); + } else { + /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = a0 | r0 # 00dc */ + tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); + } + break; + + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + /* Stolen from gcc's builtin_bswap32 */ + a1 = args[1]; + a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; + + /* a1 = args[1] # abcd */ + /* a0 = rotate_left (a1, 8) # bcda */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + if (a0 == TCG_REG_R0) { + tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); + } + break; + + case INDEX_op_bswap64_i64: + a0 = args[0], a1 = args[1], a2 = 0; + if (a0 == a1) { + a0 = 0; + a2 = a1; + } + + /* a1 = # abcd efgh */ + /* a0 = rl32(a1, 8) # 0000 fghe */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + /* a0 = rl64(a0, 32) # hgfe 0000 */ + /* a2 = rl64(a1, 32) # efgh abcd */ + tcg_out_rld(s, RLDICL, a0, a0, 32, 0); + tcg_out_rld(s, RLDICL, a2, a1, 32, 0); + + /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); + /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); + /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); + + if (a0 == 0) { + tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); + } + break; + + case INDEX_op_deposit_i32: + if (const_args[2]) { + uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi32(s, args[0], args[0], ~mask); + } else { + tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], + 32 - args[3] - args[4], 31 - args[3]); + } + break; + case INDEX_op_deposit_i64: + if (const_args[2]) { + uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi64(s, args[0], args[0], ~mask); + } else { + tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], + 64 - args[3] - args[4]); + } + break; + + case INDEX_op_movcond_i32: + tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + + case INDEX_op_add2_i64: + /* Note that the CA bit is defined based on the word size of the + environment. So in 64-bit mode it's always carry-out of bit 63. + The fallback code using deposit works just as well for 32-bit. */ + a0 = args[0], a1 = args[1]; + if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { + a0 = TCG_REG_R0; + } + if (const_args[4]) { + tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); + } else { + tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); + } + if (const_args[5]) { + tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); + } else { + tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); + } + break; + + case INDEX_op_sub2_i64: + a0 = args[0], a1 = args[1]; + if (a0 == args[5] || (!const_args[4] && a0 == args[4])) { + a0 = TCG_REG_R0; + } + if (const_args[2]) { + tcg_out32(s, SUBFIC | TAI(a0, args[3], args[2])); + } else { + tcg_out32(s, SUBFC | TAB(a0, args[3], args[2])); + } + if (const_args[4]) { + tcg_out32(s, (args[4] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); + } else { + tcg_out32(s, SUBFE | TAB(a1, args[5], args[4])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); + } + break; + + case INDEX_op_mulu2_i64: + case INDEX_op_muls2_i64: + { + int oph = (opc == INDEX_op_mulu2_i64 ? MULHDU : MULHD); + TCGReg outl = args[0], outh = args[1]; + a0 = args[2], a1 = args[3]; + + if (outl == a0 || outl == a1) { + if (outh == a0 || outh == a1) { + outl = TCG_REG_R0; + } else { + tcg_out32(s, oph | TAB(outh, a0, a1)); + oph = 0; + } + } + tcg_out32(s, MULLD | TAB(outl, a0, a1)); + if (oph != 0) { + tcg_out32(s, oph | TAB(outh, a0, a1)); + } + if (outl != args[0]) { + tcg_out_mov(s, TCG_TYPE_I64, args[0], outl); + } + } + break; + default: tcg_dump_ops (s); tcg_abort (); @@ -1608,19 +2039,24 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ld32s_i64, { "r", "r" } }, { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_mul_i32, { "r", "r", "ri" } }, + { INDEX_op_mul_i32, { "r", "r", "rI" } }, { INDEX_op_div_i32, { "r", "r", "r" } }, { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_rem_i32, { "r", "r", "r" } }, - { INDEX_op_remu_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "rI", "ri" } }, { INDEX_op_and_i32, { "r", "r", "ri" } }, { INDEX_op_or_i32, { "r", "r", "ri" } }, { INDEX_op_xor_i32, { "r", "r", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + { INDEX_op_orc_i32, { "r", "r", "ri" } }, + { INDEX_op_eqv_i32, { "r", "r", "ri" } }, + { INDEX_op_nand_i32, { "r", "r", "r" } }, + { INDEX_op_nor_i32, { "r", "r", "r" } }, { INDEX_op_shl_i32, { "r", "r", "ri" } }, { INDEX_op_shr_i32, { "r", "r", "ri" } }, { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, { INDEX_op_brcond_i32, { "r", "ri" } }, { INDEX_op_brcond_i64, { "r", "ri" } }, @@ -1628,21 +2064,26 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_neg_i32, { "r", "r" } }, { INDEX_op_not_i32, { "r", "r" } }, - { INDEX_op_add_i64, { "r", "r", "ri" } }, - { INDEX_op_sub_i64, { "r", "r", "ri" } }, - { INDEX_op_and_i64, { "r", "r", "rZ" } }, - { INDEX_op_or_i64, { "r", "r", "rZ" } }, - { INDEX_op_xor_i64, { "r", "r", "rZ" } }, + { INDEX_op_add_i64, { "r", "r", "rT" } }, + { INDEX_op_sub_i64, { "r", "rI", "rT" } }, + { INDEX_op_and_i64, { "r", "r", "ri" } }, + { INDEX_op_or_i64, { "r", "r", "rU" } }, + { INDEX_op_xor_i64, { "r", "r", "rU" } }, + { INDEX_op_andc_i64, { "r", "r", "ri" } }, + { INDEX_op_orc_i64, { "r", "r", "r" } }, + { INDEX_op_eqv_i64, { "r", "r", "r" } }, + { INDEX_op_nand_i64, { "r", "r", "r" } }, + { INDEX_op_nor_i64, { "r", "r", "r" } }, { INDEX_op_shl_i64, { "r", "r", "ri" } }, { INDEX_op_shr_i64, { "r", "r", "ri" } }, { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - { INDEX_op_mul_i64, { "r", "r", "r" } }, + { INDEX_op_mul_i64, { "r", "r", "rI" } }, { INDEX_op_div_i64, { "r", "r", "r" } }, { INDEX_op_divu_i64, { "r", "r", "r" } }, - { INDEX_op_rem_i64, { "r", "r", "r" } }, - { INDEX_op_remu_i64, { "r", "r", "r" } }, { INDEX_op_neg_i64, { "r", "r" } }, { INDEX_op_not_i64, { "r", "r" } }, @@ -1666,16 +2107,38 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext8s_i64, { "r", "r" } }, { INDEX_op_ext16s_i64, { "r", "r" } }, { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, { INDEX_op_setcond_i32, { "r", "r", "ri" } }, { INDEX_op_setcond_i64, { "r", "r", "ri" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, + { INDEX_op_muls2_i64, { "r", "r", "r", "r" } }, + { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } }, { -1 }, }; static void tcg_target_init (TCGContext *s) { +#ifdef CONFIG_GETAUXVAL + unsigned long hwcap = getauxval(AT_HWCAP); + if (hwcap & PPC_FEATURE_ARCH_2_06) { + have_isa_2_06 = true; + } +#endif + tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); tcg_regset_set32 (tcg_target_call_clobber_regs, 0, diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 97fc5c988..48fc6e2e5 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_PPC64 #define TCG_TARGET_PPC64 1 #define TCG_TARGET_WORDS_BIGENDIAN @@ -66,46 +67,60 @@ typedef enum { #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET 48 +/* optional instructions automatically implemented */ +#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ +#define TCG_TARGET_HAS_ext16u_i32 0 +#define TCG_TARGET_HAS_ext8u_i64 0 +#define TCG_TARGET_HAS_ext16u_i64 0 +#define TCG_TARGET_HAS_ext32u_i64 0 + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 0 -#define TCG_TARGET_HAS_ext16u_i32 0 -#define TCG_TARGET_HAS_bswap16_i32 0 -#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_neg_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_andc_i32 1 +#define TCG_TARGET_HAS_orc_i32 1 +#define TCG_TARGET_HAS_eqv_i32 1 +#define TCG_TARGET_HAS_nand_i32 1 +#define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rot_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 0 -#define TCG_TARGET_HAS_bswap32_i64 0 -#define TCG_TARGET_HAS_bswap64_i64 0 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_neg_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_deposit_i64 0 -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_andc_i64 1 +#define TCG_TARGET_HAS_orc_i64 1 +#define TCG_TARGET_HAS_eqv_i64 1 +#define TCG_TARGET_HAS_nand_i64 1 +#define TCG_TARGET_HAS_nor_i64 1 +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 1 +#define TCG_TARGET_HAS_muls2_i64 1 #define TCG_AREG0 TCG_REG_R27 #define TCG_TARGET_EXTEND_ARGS 1 + +#endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index fd9286f52..f229f1c34 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -24,16 +24,18 @@ * THE SOFTWARE. */ +/* We only support generating code for 64-bit mode. */ +#if TCG_TARGET_REG_BITS != 64 +#error "unsupported code generation mode" +#endif + /* ??? The translation blocks produced by TCG are generally small enough to be entirely reachable with a 16-bit displacement. Leaving the option for a 32-bit displacement here Just In Case. */ #define USE_LONG_BRANCHES 0 #define TCG_CT_CONST_32 0x0100 -#define TCG_CT_CONST_NEG 0x0200 -#define TCG_CT_CONST_ADDI 0x0400 #define TCG_CT_CONST_MULI 0x0800 -#define TCG_CT_CONST_ANDI 0x1000 #define TCG_CT_CONST_ORI 0x2000 #define TCG_CT_CONST_XORI 0x4000 #define TCG_CT_CONST_CMPI 0x8000 @@ -63,6 +65,7 @@ typedef enum S390Opcode { RIL_AFI = 0xc209, RIL_AGFI = 0xc208, + RIL_ALFI = 0xc20b, RIL_ALGFI = 0xc20a, RIL_BRASL = 0xc005, RIL_BRCL = 0xc004, @@ -84,6 +87,8 @@ typedef enum S390Opcode { RIL_NILF = 0xc00b, RIL_OIHF = 0xc00c, RIL_OILF = 0xc00d, + RIL_SLFI = 0xc205, + RIL_SLGFI = 0xc204, RIL_XIHF = 0xc006, RIL_XILF = 0xc007, @@ -118,8 +123,12 @@ typedef enum S390Opcode { RIE_CLGIJ = 0xec7d, RIE_CLRJ = 0xec77, RIE_CRJ = 0xec76, + RIE_RISBG = 0xec55, RRE_AGR = 0xb908, + RRE_ALGR = 0xb90a, + RRE_ALCR = 0xb998, + RRE_ALCGR = 0xb988, RRE_CGR = 0xb920, RRE_CLGR = 0xb921, RRE_DLGR = 0xb987, @@ -137,14 +146,22 @@ typedef enum S390Opcode { RRE_LRVR = 0xb91f, RRE_LRVGR = 0xb90f, RRE_LTGR = 0xb902, + RRE_MLGR = 0xb986, RRE_MSGR = 0xb90c, RRE_MSR = 0xb252, RRE_NGR = 0xb980, RRE_OGR = 0xb981, RRE_SGR = 0xb909, + RRE_SLGR = 0xb90b, + RRE_SLBR = 0xb999, + RRE_SLBGR = 0xb989, RRE_XGR = 0xb982, + RRF_LOCR = 0xb9f2, + RRF_LOCGR = 0xb9e2, + RR_AR = 0x1a, + RR_ALR = 0x1e, RR_BASR = 0x0d, RR_BCR = 0x07, RR_CLR = 0x15, @@ -156,6 +173,7 @@ typedef enum S390Opcode { RR_NR = 0x14, RR_OR = 0x16, RR_SR = 0x1b, + RR_SLR = 0x1f, RR_XR = 0x17, RSY_RLL = 0xeb1d, @@ -172,6 +190,7 @@ typedef enum S390Opcode { RXY_AY = 0xe35a, RXY_CG = 0xe320, RXY_CY = 0xe359, + RXY_LAY = 0xe371, RXY_LB = 0xe376, RXY_LG = 0xe304, RXY_LGB = 0xe377, @@ -198,6 +217,7 @@ typedef enum S390Opcode { RX_A = 0x5a, RX_C = 0x59, RX_L = 0x58, + RX_LA = 0x41, RX_LH = 0x48, RX_ST = 0x50, RX_STC = 0x42, @@ -252,9 +272,6 @@ static const int tcg_target_call_iarg_regs[] = { static const int tcg_target_call_oarg_regs[] = { TCG_REG_R2, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_R3 -#endif }; #define S390_CC_EQ 8 @@ -299,7 +316,7 @@ static const uint8_t tcg_cond_to_ltr_cond[] = { #ifdef CONFIG_SOFTMMU -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -329,6 +346,7 @@ static uint8_t *tb_ret_addr; #define FACILITY_LONG_DISP (1ULL << (63 - 18)) #define FACILITY_EXT_IMM (1ULL << (63 - 21)) #define FACILITY_GEN_INST_EXT (1ULL << (63 - 34)) +#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45)) static uint64_t facilities; @@ -376,6 +394,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_set32(ct->u.regs, 0, 0xffff); tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); break; case 'a': /* force R2 for division */ ct->ct |= TCG_CT_REG; @@ -387,21 +406,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_clear(ct->u.regs); tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); break; - case 'N': /* force immediate negate */ - ct->ct |= TCG_CT_CONST_NEG; - break; case 'W': /* force 32-bit ("word") immediate */ ct->ct |= TCG_CT_CONST_32; break; - case 'I': - ct->ct |= TCG_CT_CONST_ADDI; - break; case 'K': ct->ct |= TCG_CT_CONST_MULI; break; - case 'A': - ct->ct |= TCG_CT_CONST_ANDI; - break; case 'O': ct->ct |= TCG_CT_CONST_ORI; break; @@ -420,63 +430,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) return 0; } -/* Immediates to be used with logical AND. This is an optimization only, - since a full 64-bit immediate AND can always be performed with 4 sequential - NI[LH][LH] instructions. What we're looking for is immediates that we - can load efficiently, and the immediate load plus the reg-reg AND is - smaller than the sequential NI's. */ - -static int tcg_match_andi(int ct, tcg_target_ulong val) -{ - int i; - - if (facilities & FACILITY_EXT_IMM) { - if (ct & TCG_CT_CONST_32) { - /* All 32-bit ANDs can be performed with 1 48-bit insn. */ - return 1; - } - - /* Zero-extensions. */ - if (val == 0xff || val == 0xffff || val == 0xffffffff) { - return 1; - } - } else { - if (ct & TCG_CT_CONST_32) { - val = (uint32_t)val; - } else if (val == 0xffffffff) { - return 1; - } - } - - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if ((val & mask) == mask) { - return 1; - } - } - - /* Look for 16-bit values performing the mask. These are better - to load with LLI[LH][LH]. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = 0xffffull << i*16; - if ((val & mask) == val) { - return 0; - } - } - - /* Look for 32-bit values performing the 64-bit mask. These - are better to load with LLI[LH]F, or if extended immediates - not available, with a pair of LLI insns. */ - if ((ct & TCG_CT_CONST_32) == 0) { - if (val <= 0xffffffff || (val & 0xffffffff) == 0) { - return 0; - } - } - - return 1; -} - /* Immediates to be used with logical OR. This is an optimization only, since a full 64-bit immediate OR can always be performed with 4 sequential OI[LH][LH] instructions. What we're looking for is immediates that we @@ -571,25 +524,12 @@ static int tcg_target_const_match(tcg_target_long val, } /* Handle the modifiers. */ - if (ct & TCG_CT_CONST_NEG) { - val = -val; - } if (ct & TCG_CT_CONST_32) { val = (int32_t)val; } /* The following are mutually exclusive. */ - if (ct & TCG_CT_CONST_ADDI) { - /* Immediates that may be used with add. If we have the - extended-immediates facility then we have ADD IMMEDIATE - with signed and unsigned 32-bit, otherwise we have only - ADD HALFWORD IMMEDIATE with a signed 16-bit. */ - if (facilities & FACILITY_EXT_IMM) { - return val == (int32_t)val || val == (uint32_t)val; - } else { - return val == (int16_t)val; - } - } else if (ct & TCG_CT_CONST_MULI) { + if (ct & TCG_CT_CONST_MULI) { /* Immediates that may be used with multiply. If we have the general-instruction-extensions, then we have MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we have only @@ -599,8 +539,6 @@ static int tcg_target_const_match(tcg_target_long val, } else { return val == (int16_t)val; } - } else if (ct & TCG_CT_CONST_ANDI) { - return tcg_match_andi(ct, val); } else if (ct & TCG_CT_CONST_ORI) { return tcg_match_ori(ct, val); } else if (ct & TCG_CT_CONST_XORI) { @@ -625,6 +563,12 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, tcg_out32(s, (op << 16) | (r1 << 4) | r2); } +static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, int m3) +{ + tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); +} + static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) { tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); @@ -770,7 +714,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, /* If we get here, both the high and low parts have non-zero bits. */ /* Recurse to load the lower 32-bits. */ - tcg_out_movi(s, TCG_TYPE_I32, ret, sval); + tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); /* Insert data into the high 32-bits. */ uval = uval >> 31 >> 1; @@ -804,10 +748,11 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, tcg_target_long ofs) { if (ofs < -0x80000 || ofs >= 0x80000) { - /* Combine the low 16 bits of the offset with the actual load insn; - the high 48 bits must come from an immediate load. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs & ~0xffff); - ofs &= 0xffff; + /* Combine the low 20 bits of the offset with the actual load insn; + the high 44 bits must come from an immediate load. */ + tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); + ofs = low; /* If we were already given an index register, add it in. */ if (index != TCG_REG_NONE) { @@ -866,6 +811,15 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) tcg_out_ld(s, type, dest, dest, addr & 0xffff); } +static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, + int msb, int lsb, int ofs, int z) +{ + /* Format RIE-f */ + tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); + tcg_out16(s, (msb << 8) | (z << 7) | lsb); + tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); +} + static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { if (facilities & FACILITY_EXT_IMM) { @@ -956,30 +910,37 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) tcg_out_insn(s, RRE, LLGFR, dest, src); } -static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val) +/* Accept bit patterns like these: + 0....01....1 + 1....10....0 + 1..10..01..1 + 0..01..10..0 + Copied from gcc sources. */ +static inline bool risbg_mask(uint64_t c) { - if (val == (int16_t)val) { - tcg_out_insn(s, RI, AHI, dest, val); - } else { - tcg_out_insn(s, RIL, AFI, dest, val); - } + uint64_t lsb; + /* We don't change the number of transitions by inverting, + so make sure we start with the LSB zero. */ + if (c & 1) { + c = ~c; + } + /* Reject all zeros or all ones. */ + if (c == 0) { + return false; + } + /* Find the first transition. */ + lsb = c & -c; + /* Invert to look for a second transition. */ + c = ~c; + /* Erase the first transition. */ + c &= -lsb; + /* Find the second transition, if any. */ + lsb = c & -c; + /* Match if all the bits are 1's, or if c is zero. */ + return c == -lsb; } -static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val) -{ - if (val == (int16_t)val) { - tcg_out_insn(s, RI, AGHI, dest, val); - } else if (val == (int32_t)val) { - tcg_out_insn(s, RIL, AGFI, dest, val); - } else if (val == (uint32_t)val) { - tcg_out_insn(s, RIL, ALGFI, dest, val); - } else { - tcg_abort(); - } - -} - -static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) +static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) { static const S390Opcode ni_insns[4] = { RI_NILL, RI_NILH, RI_NIHL, RI_NIHH @@ -987,63 +948,64 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) static const S390Opcode nif_insns[2] = { RIL_NILF, RIL_NIHF }; - + uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); int i; - /* Look for no-op. */ - if (val == -1) { - return; - } - /* Look for the zero-extensions. */ - if (val == 0xffffffff) { + if ((val & valid) == 0xffffffff) { tgen_ext32u(s, dest, dest); return; } - if (facilities & FACILITY_EXT_IMM) { - if (val == 0xff) { + if ((val & valid) == 0xff) { tgen_ext8u(s, TCG_TYPE_I64, dest, dest); return; } - if (val == 0xffff) { + if ((val & valid) == 0xffff) { tgen_ext16u(s, TCG_TYPE_I64, dest, dest); return; } + } - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if ((val & mask) == mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); - return; - } + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + return; } + } - /* Try all 48-bit insns that can perform it in one go. */ - if (facilities & FACILITY_EXT_IMM) { - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if ((val & mask) == mask) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } + /* Try all 48-bit insns that can perform it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = ~(0xffffffffull << i*32); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; } } + } + if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { + int msb, lsb; + if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { + /* Achieve wraparound by swapping msb and lsb. */ + msb = 63 - ctz64(~val); + lsb = clz64(~val) + 1; + } else { + msb = clz64(val); + lsb = 63 - ctz64(val); + } + tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1); + return; + } - /* Perform the AND via sequential modifications to the high and low - parts. Do this via recursion to handle 16-bit vs 32-bit masks in - each half. */ - tgen64_andi(s, dest, val | 0xffffffff00000000ull); - tgen64_andi(s, dest, val | 0x00000000ffffffffull); + /* Fall back to loading the constant. */ + tcg_out_movi(s, type, TCG_TMP0, val); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, TCG_TMP0); } else { - /* With no extended-immediate facility, just emit the sequence. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = 0xffffull << i*16; - if ((val & mask) != mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); - } - } + tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); } } @@ -1156,9 +1118,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, } static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c, - TCGReg dest, TCGReg r1, TCGArg c2, int c2const) + TCGReg dest, TCGReg c1, TCGArg c2, int c2const) { - int cc = tgen_cmp(s, type, c, r1, c2, c2const); + int cc = tgen_cmp(s, type, c, c1, c2, c2const); /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */ tcg_out_movi(s, type, dest, 1); @@ -1166,6 +1128,36 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c, tcg_out_movi(s, type, dest, 0); } +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, + TCGReg c1, TCGArg c2, int c2const, TCGReg r3) +{ + int cc; + if (facilities & FACILITY_LOAD_ON_COND) { + cc = tgen_cmp(s, type, c, c1, c2, c2const); + tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); + } else { + c = tcg_invert_cond(c); + cc = tgen_cmp(s, type, c, c1, c2, c2const); + + /* Emit: if (cc) goto over; dest = r3; over: */ + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_insn(s, RRE, LGR, dest, r3); + } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ + return (facilities & FACILITY_GEN_INST_EXT) != 0; +} + +static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, + int ofs, int len) +{ + int lsb = (63 - ofs); + int msb = lsb - (len - 1); + tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); +} + static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest) { tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1; @@ -1239,7 +1231,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, int cc; if (facilities & FACILITY_GEN_INST_EXT) { - bool is_unsigned = (c > TCG_COND_GT); + bool is_unsigned = is_unsigned_cond(c); bool in_range; S390Opcode opc; @@ -1405,37 +1397,29 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val) -{ - if (tcg_match_andi(0, val)) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val); - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - } else { - tgen64_andi(s, dest, val); - } -} - -static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, - TCGReg addr_reg, int mem_index, int opc, - uint16_t **label2_ptr_p, int is_store) +static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, + TCGReg addr_reg, int mem_index, int opc, + uint16_t **label2_ptr_p, int is_store) { - const TCGReg arg0 = TCG_REG_R2; - const TCGReg arg1 = TCG_REG_R3; + const TCGReg arg0 = tcg_target_call_iarg_regs[0]; + const TCGReg arg1 = tcg_target_call_iarg_regs[1]; + const TCGReg arg2 = tcg_target_call_iarg_regs[2]; + const TCGReg arg3 = tcg_target_call_iarg_regs[3]; int s_bits = opc & 3; uint16_t *label1_ptr; tcg_target_long ofs; if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg0, addr_reg); + tgen_ext32u(s, arg1, addr_reg); } else { - tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg); + tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); } - tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE, + tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); if (is_store) { ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); @@ -1445,15 +1429,15 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, assert(ofs < 0x80000); if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs); + tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs); } else { - tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs); + tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs); } if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg0, addr_reg); + tgen_ext32u(s, arg1, addr_reg); } else { - tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg); + tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); } label1_ptr = (uint16_t*)s->code_ptr; @@ -1467,56 +1451,42 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, for the calling convention. */ switch (opc) { case LD_UINT8: - tgen_ext8u(s, TCG_TYPE_I64, arg1, data_reg); + tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg); break; case LD_UINT16: - tgen_ext16u(s, TCG_TYPE_I64, arg1, data_reg); + tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg); break; case LD_UINT32: - tgen_ext32u(s, arg1, data_reg); + tgen_ext32u(s, arg2, data_reg); break; case LD_UINT64: - tcg_out_mov(s, TCG_TYPE_I64, arg1, data_reg); + tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg); break; default: tcg_abort(); } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index); + tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]); } else { - tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); + tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0); tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]); /* sign extension */ switch (opc) { case LD_INT8: - tgen_ext8s(s, TCG_TYPE_I64, data_reg, arg0); + tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); break; case LD_INT16: - tgen_ext16s(s, TCG_TYPE_I64, data_reg, arg0); + tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); break; case LD_INT32: - tgen_ext32s(s, data_reg, arg0); + tgen_ext32s(s, data_reg, TCG_REG_R2); break; default: /* unsigned -> just copy */ - tcg_out_mov(s, TCG_TYPE_I64, data_reg, arg0); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); break; } } @@ -1533,7 +1503,9 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); assert(ofs < 0x80000); - tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs); + tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs); + + return arg1; } static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr) @@ -1579,10 +1551,10 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc) #if defined(CONFIG_SOFTMMU) mem_index = *args; - tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 0); + addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, + opc, &label2_ptr, 0); - tcg_out_qemu_ld_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0); + tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); tcg_finish_qemu_ldst(s, label2_ptr); #else @@ -1608,10 +1580,10 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) #if defined(CONFIG_SOFTMMU) mem_index = *args; - tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 1); + addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, + opc, &label2_ptr, 1); - tcg_out_qemu_st_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0); + tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); tcg_finish_qemu_ldst(s, label2_ptr); #else @@ -1620,19 +1592,15 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) #endif } -#if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) -#else -# define OP_32_64(x) \ - case glue(glue(INDEX_op_,x),_i32) -#endif static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { S390Opcode op; + TCGArg a0, a1, a2; switch (opc) { case INDEX_op_exit_tb: @@ -1708,23 +1676,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (const_args[2]) { - tgen32_addi(s, args[0], args[2]); + do_addi_32: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RIL, AFI, a0, a2); + break; + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, AR, a0, a2); } else { - tcg_out_insn(s, RR, AR, args[0], args[2]); + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); } break; case INDEX_op_sub_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (const_args[2]) { - tgen32_addi(s, args[0], -args[2]); - } else { - tcg_out_insn(s, RR, SR, args[0], args[2]); + a2 = -a2; + goto do_addi_32; } + tcg_out_insn(s, RR, SR, args[0], args[2]); break; case INDEX_op_and_i32: if (const_args[2]) { - tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull); + tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); } else { tcg_out_insn(s, RR, NR, args[0], args[2]); } @@ -1824,6 +1807,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RRE, LRVR, args[0], args[1]); break; + case INDEX_op_add2_i32: + /* ??? Make use of ALFI. */ + tcg_out_insn(s, RR, ALR, args[0], args[4]); + tcg_out_insn(s, RRE, ALCR, args[1], args[5]); + break; + case INDEX_op_sub2_i32: + /* ??? Make use of SLFI. */ + tcg_out_insn(s, RR, SLR, args[0], args[4]); + tcg_out_insn(s, RRE, SLBR, args[1], args[5]); + break; + case INDEX_op_br: tgen_branch(s, S390_CC_ALWAYS, args[0]); break; @@ -1836,6 +1830,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_movcond_i32: + tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, LD_UINT8); @@ -1870,7 +1868,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_st(s, args, LD_UINT64); break; -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_mov_i64: tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]); break; @@ -1899,15 +1896,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - tgen64_addi(s, args[0], args[2]); + do_addi_64: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AGHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + break; + } else if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + break; + } else if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + break; + } + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RRE, AGR, a0, a2); } else { - tcg_out_insn(s, RRE, AGR, args[0], args[2]); + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); } break; case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - tgen64_addi(s, args[0], -args[2]); + a2 = -a2; + goto do_addi_64; } else { tcg_out_insn(s, RRE, SGR, args[0], args[2]); } @@ -1915,7 +1936,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_and_i64: if (const_args[2]) { - tgen64_andi(s, args[0], args[2]); + tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); } else { tcg_out_insn(s, RRE, NGR, args[0], args[2]); } @@ -1964,6 +1985,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu2_i64: tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); break; + case INDEX_op_mulu2_i64: + tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); + break; case INDEX_op_shl_i64: op = RSY_SLLG; @@ -2020,6 +2044,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_ext32u(s, args[0], args[1]); break; + case INDEX_op_add2_i64: + /* ??? Make use of ALGFI and SLGFI. */ + tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); + break; + case INDEX_op_sub2_i64: + /* ??? Make use of ALGFI and SLGFI. */ + tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); + break; + case INDEX_op_brcond_i64: tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], args[1], const_args[1], args[3]); @@ -2028,6 +2063,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_movcond_i64: + tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; case INDEX_op_qemu_ld32u: tcg_out_qemu_ld(s, args, LD_UINT32); @@ -2035,7 +2074,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_qemu_ld32s: tcg_out_qemu_ld(s, args, LD_INT32); break; -#endif /* TCG_TARGET_REG_BITS == 64 */ + + OP_32_64(deposit): + tgen_deposit(s, args[0], args[2], args[3], args[4]); + break; default: fprintf(stderr,"unimplemented opc 0x%x\n",opc); @@ -2061,14 +2103,14 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_st16_i32, { "r", "r" } }, { INDEX_op_st_i32, { "r", "r" } }, - { INDEX_op_add_i32, { "r", "0", "rWI" } }, - { INDEX_op_sub_i32, { "r", "0", "rWNI" } }, + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, { INDEX_op_mul_i32, { "r", "0", "rK" } }, { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, - { INDEX_op_and_i32, { "r", "0", "rWA" } }, + { INDEX_op_and_i32, { "r", "0", "ri" } }, { INDEX_op_or_i32, { "r", "0", "rWO" } }, { INDEX_op_xor_i32, { "r", "0", "rWX" } }, @@ -2089,8 +2131,13 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap16_i32, { "r", "r" } }, { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_brcond_i32, { "r", "rWC" } }, { INDEX_op_setcond_i32, { "r", "r", "rWC" } }, + { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } }, + { INDEX_op_deposit_i32, { "r", "0", "r" } }, { INDEX_op_qemu_ld8u, { "r", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L" } }, @@ -2104,7 +2151,6 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_qemu_st32, { "L", "L" } }, { INDEX_op_qemu_st64, { "L", "L" } }, -#if defined(__s390x__) { INDEX_op_mov_i64, { "r", "r" } }, { INDEX_op_movi_i64, { "r" } }, @@ -2121,14 +2167,15 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_st32_i64, { "r", "r" } }, { INDEX_op_st_i64, { "r", "r" } }, - { INDEX_op_add_i64, { "r", "0", "rI" } }, - { INDEX_op_sub_i64, { "r", "0", "rNI" } }, + { INDEX_op_add_i64, { "r", "r", "ri" } }, + { INDEX_op_sub_i64, { "r", "0", "ri" } }, { INDEX_op_mul_i64, { "r", "0", "rK" } }, { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, - { INDEX_op_and_i64, { "r", "0", "rA" } }, + { INDEX_op_and_i64, { "r", "0", "ri" } }, { INDEX_op_or_i64, { "r", "0", "rO" } }, { INDEX_op_xor_i64, { "r", "0", "rX" } }, @@ -2152,12 +2199,16 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } }, + { INDEX_op_brcond_i64, { "r", "rC" } }, { INDEX_op_setcond_i64, { "r", "r", "rC" } }, + { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, + { INDEX_op_deposit_i64, { "r", "0", "r" } }, { INDEX_op_qemu_ld32u, { "r", "L" } }, { INDEX_op_qemu_ld32s, { "r", "L" } }, -#endif { -1 }, }; @@ -2273,13 +2324,6 @@ static void query_facilities(void) static void tcg_target_init(TCGContext *s) { -#if !defined(CONFIG_USER_ONLY) - /* fail safe */ - if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) { - tcg_abort(); - } -#endif - query_facilities(); tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); @@ -2302,17 +2346,24 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); tcg_add_target_add_op_defs(s390_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } static void tcg_target_qemu_prologue(TCGContext *s) { + tcg_target_long frame_size; + /* stmg %r6,%r15,48(%r15) (save registers) */ tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); - /* aghi %r15,-160 (stack frame) */ - tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160); + /* aghi %r15,-frame_size */ + frame_size = TCG_TARGET_CALL_STACK_OFFSET; + frame_size += TCG_STATIC_CALL_ARGS_SIZE; + frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long); + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size); + + tcg_set_frame(s, TCG_REG_CALL_STACK, + TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, + CPU_TEMP_BUF_NLONGS * sizeof(long)); if (GUEST_BASE >= 0x80000) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); @@ -2325,8 +2376,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tb_ret_addr = s->code_ptr; - /* lmg %r6,%r15,208(%r15) (restore registers) */ - tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208); + /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ + tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, + frame_size + 48); /* br %r14 (return) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index a0181aef7..42ca36c0e 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_S390 #define TCG_TARGET_S390 1 #define TCG_TARGET_WORDS_BIGENDIAN @@ -62,10 +63,13 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 -#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 @@ -84,14 +88,21 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_deposit_i64 0 -#define TCG_TARGET_HAS_movcond_i64 0 -#endif +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 1 +#define TCG_TARGET_HAS_muls2_i64 0 + +extern bool tcg_target_deposit_valid(int ofs, int len); +#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid +#define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R15 #define TCG_TARGET_STACK_ALIGN 8 -#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_STACK_OFFSET 160 #define TCG_TARGET_EXTEND_ARGS 1 @@ -103,3 +114,5 @@ static inline void flush_icache_range(tcg_target_ulong start, tcg_target_ulong stop) { } + +#endif diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index f14664787..5bfd29c3b 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -776,6 +776,7 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, break; } } +#endif static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, TCGArg al, TCGArg ah, TCGArg bl, int blconst, @@ -792,7 +793,6 @@ static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, tcg_out_arithc(s, rh, ah, bh, bhconst, oph); tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); } -#endif /* Generate global QEMU prologue and epilogue code */ static void tcg_target_qemu_prologue(TCGContext *s) @@ -831,7 +831,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) #if defined(CONFIG_SOFTMMU) -#include "../../softmmu_defs.h" +#include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -1327,6 +1327,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[3], const_args[3], args[4], const_args[4]); break; +#endif + case INDEX_op_add2_i32: tcg_out_addsub2(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], @@ -1342,7 +1344,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, ARITH_UMUL); tcg_out_rdy(s, args[1]); break; -#endif case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); @@ -1511,10 +1512,11 @@ static const TCGTargetOpDef sparc_op_defs[] = { #if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, +#endif + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, -#endif #if TCG_TARGET_REG_BITS == 64 { INDEX_op_mov_i64, { "r", "r" } }, @@ -1645,28 +1647,11 @@ static void tcg_target_init(TCGContext *s) #endif typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t id; - uint8_t version; - char augmentation[1]; - uint8_t code_align; - uint8_t data_align; - uint8_t return_column; -} DebugFrameCIE; - -typedef struct { - uint32_t len __attribute__((aligned((sizeof(void *))))); - uint32_t cie_offset; - tcg_target_long func_start __attribute__((packed)); - tcg_target_long func_len __attribute__((packed)); - uint8_t def_cfa[TCG_TARGET_REG_BITS == 64 ? 4 : 2]; - uint8_t win_save; - uint8_t ret_save[3]; -} DebugFrameFDE; - -typedef struct { DebugFrameCIE cie; - DebugFrameFDE fde; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[TCG_TARGET_REG_BITS == 64 ? 4 : 2]; + uint8_t fde_win_save; + uint8_t fde_ret_save[3]; } DebugFrame; static DebugFrame debug_frame = { @@ -1677,8 +1662,10 @@ static DebugFrame debug_frame = { .cie.data_align = -sizeof(void *) & 0x7f, .cie.return_column = 15, /* o7 */ - .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */ - .fde.def_cfa = { + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { #if TCG_TARGET_REG_BITS == 64 12, 30, /* DW_CFA_def_cfa i6, 2047 */ (2047 & 0x7f) | 0x80, (2047 >> 7) @@ -1686,8 +1673,8 @@ static DebugFrame debug_frame = { 13, 30 /* DW_CFA_def_cfa_register i6 */ #endif }, - .fde.win_save = 0x2d, /* DW_CFA_GNU_window_save */ - .fde.ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ + .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ + .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ }; void tcg_register_jit(void *buf, size_t buf_size) diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 0e7d398a4..dab52d717 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#ifndef TCG_TARGET_SPARC #define TCG_TARGET_SPARC 1 #define TCG_TARGET_WORDS_BIGENDIAN @@ -85,6 +86,7 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_ext8s_i32 0 #define TCG_TARGET_HAS_ext16s_i32 0 @@ -101,9 +103,14 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_muls2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 +#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_ext8s_i64 0 #define TCG_TARGET_HAS_ext16s_i64 0 @@ -123,6 +130,10 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 #endif #define TCG_AREG0 TCG_REG_I0 @@ -138,3 +149,5 @@ static inline void flush_icache_range(tcg_target_ulong start, for (; p < stop; p += 8) __asm__ __volatile__("flush\t%0" : : "r" (p)); } + +#endif diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 0b3cb0be3..364964d8d 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -731,8 +731,14 @@ static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { + if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_div_i32) { + TCGv_i32 t0 = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); + tcg_gen_mul_i32(t0, t0, arg2); + tcg_gen_sub_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); @@ -769,8 +775,14 @@ static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { + if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_div_i32) { + TCGv_i32 t0 = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); + tcg_gen_mul_i32(t0, t0, arg2); + tcg_gen_sub_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_new_i32(); tcg_gen_movi_i32(t0, 0); @@ -1361,8 +1373,14 @@ static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { + if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_div_i64) { + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); + tcg_gen_mul_i64(t0, t0, arg2); + tcg_gen_sub_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); @@ -1399,8 +1417,14 @@ static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { + if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_div_i64) { + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); + tcg_gen_mul_i64(t0, t0, arg2); + tcg_gen_sub_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_new_i64(); tcg_gen_movi_i64(t0, 0); @@ -2188,9 +2212,9 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, #if TCG_TARGET_REG_BITS == 32 if (ofs >= 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_LOW(arg2), ofs - 32, len); + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); return; } if (ofs + len <= 32) { @@ -2246,6 +2270,26 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, tcg_gen_deposit_i64(dest, low, high, 32, 32); } +static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(lo, TCGV_LOW(arg)); + tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + tcg_gen_trunc_i64_i32(lo, arg); + tcg_gen_shri_i64(t0, arg, 32); + tcg_gen_trunc_i64_i32(hi, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) +{ + tcg_gen_ext32u_i64(lo, arg); + tcg_gen_shri_i64(hi, arg, 32); +} + static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) @@ -2312,6 +2356,204 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, #endif } +static inline void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ + if (TCG_TARGET_HAS_add2_i32) { + tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(t0, al, ah); + tcg_gen_concat_i32_i64(t1, bl, bh); + tcg_gen_add_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ + if (TCG_TARGET_HAS_sub2_i32) { + tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(t0, al, ah); + tcg_gen_concat_i32_i64(t1, bl, bh); + tcg_gen_sub_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_mulu2_i32) { + tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t0, arg1); + tcg_gen_extu_i32_i64(t1, arg2); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_muls2_i32) { + tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace muls2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + /* Adjust for negative inputs. */ + tcg_gen_sari_i32(t2, arg1, 31); + tcg_gen_sari_i32(t3, arg2, 31); + tcg_gen_and_i32(t2, t2, arg2); + tcg_gen_and_i32(t3, t3, arg1); + tcg_gen_sub_i32(rh, t1, t2); + tcg_gen_sub_i32(rh, rh, t3); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(t0, arg1); + tcg_gen_ext_i32_i64(t1, arg2); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + if (TCG_TARGET_HAS_add2_i64) { + tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_add_i64(t0, al, bl); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al); + tcg_gen_add_i64(rh, ah, bh); + tcg_gen_add_i64(rh, rh, t1); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + if (TCG_TARGET_HAS_sub2_i64) { + tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_sub_i64(t0, al, bl); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl); + tcg_gen_sub_i64(rh, ah, bh); + tcg_gen_sub_i64(rh, rh, t1); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + +static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_mulu2_i64) { + tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulu2_i64) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + /* Adjust for negative inputs. */ + tcg_gen_sari_i64(t2, arg1, 63); + tcg_gen_sari_i64(t3, arg2, 63); + tcg_gen_and_i64(t2, t2, arg2); + tcg_gen_and_i64(t3, t3, arg1); + tcg_gen_sub_i64(rh, t1, t2); + tcg_gen_sub_i64(rh, rh, t3); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_temp_free_i64(t3); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + tcg_gen_mul_i64(t0, arg1, arg2); + tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_muls2_i64) { + tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); + /* Allow the optimizer room to replace muls2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + tcg_gen_mul_i64(t0, arg1, arg2); + tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } +} + /***************************************/ /* QEMU specific operations. Their type depend on the QEMU CPU type. */ @@ -2329,6 +2571,7 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, #define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32 #define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32 #define TCGV_UNUSED(x) TCGV_UNUSED_I32(x) +#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x) #define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b) #else #define TCGv TCGv_i64 @@ -2340,6 +2583,7 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, #define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64 #define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64 #define TCGV_UNUSED(x) TCGV_UNUSED_I64(x) +#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x) #define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b) #endif @@ -2623,6 +2867,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i64 #define tcg_gen_eqv_tl tcg_gen_eqv_i64 #define tcg_gen_nand_tl tcg_gen_nand_i64 @@ -2636,6 +2881,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 +#define tcg_gen_add2_tl tcg_gen_add2_i64 +#define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 +#define tcg_gen_muls2_tl tcg_gen_muls2_i64 #else #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 @@ -2695,6 +2944,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i32 #define tcg_gen_eqv_tl tcg_gen_eqv_i32 #define tcg_gen_nand_tl tcg_gen_nand_i32 @@ -2708,6 +2958,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 #define tcg_gen_movcond_tl tcg_gen_movcond_i32 +#define tcg_gen_add2_tl tcg_gen_add2_i32 +#define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 +#define tcg_gen_muls2_tl tcg_gen_muls2_i32 #endif #if TCG_TARGET_REG_BITS == 32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 965106341..a8af5b96a 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -27,20 +27,24 @@ */ /* predefined ops */ -DEF(end, 0, 0, 0, 0) /* must be kept first */ -DEF(nop, 0, 0, 0, 0) -DEF(nop1, 0, 0, 1, 0) -DEF(nop2, 0, 0, 2, 0) -DEF(nop3, 0, 0, 3, 0) -DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ +DEF(end, 0, 0, 0, TCG_OPF_NOT_PRESENT) /* must be kept first */ +DEF(nop, 0, 0, 0, TCG_OPF_NOT_PRESENT) +DEF(nop1, 0, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(nop2, 0, 0, 2, TCG_OPF_NOT_PRESENT) +DEF(nop3, 0, 0, 3, TCG_OPF_NOT_PRESENT) -DEF(discard, 1, 0, 0, 0) +/* variable number of parameters */ +DEF(nopn, 0, 0, 1, TCG_OPF_NOT_PRESENT) + +DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) +DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) + +/* variable number of parameters */ +DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) -DEF(set_label, 0, 0, 1, TCG_OPF_BB_END) -DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) /* variable number of parameters */ DEF(br, 0, 0, 1, TCG_OPF_BB_END) -#define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT) +#define IMPL(X) (__builtin_constant_p(X) && !(X) ? TCG_OPF_NOT_PRESENT : 0) #if TCG_TARGET_REG_BITS == 32 # define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT #else @@ -66,8 +70,8 @@ DEF(sub_i32, 1, 2, 0, 0) DEF(mul_i32, 1, 2, 0, 0) DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) -DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) -DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) +DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) +DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) DEF(and_i32, 1, 2, 0, 0) @@ -83,10 +87,11 @@ DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) -DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) +DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) +DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) +DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) @@ -125,8 +130,8 @@ DEF(sub_i64, 1, 2, 0, IMPL64) DEF(mul_i64, 1, 2, 0, IMPL64) DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) -DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) -DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) +DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) +DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) DEF(and_i64, 1, 2, 0, IMPL64) @@ -158,11 +163,16 @@ DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) +DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) +DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) +DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) +DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) + /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -DEF(debug_insn_start, 0, 0, 2, 0) +DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT) #else -DEF(debug_insn_start, 0, 0, 1, 0) +DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT) #endif DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 5615b133e..a1ebef9f9 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -12,7 +12,9 @@ int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2); uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2); uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2); +uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2); #endif @@ -37,9 +37,9 @@ #endif #include "qemu-common.h" -#include "cache-utils.h" -#include "host-utils.h" -#include "qemu-timer.h" +#include "qemu/cache-utils.h" +#include "qemu/host-utils.h" +#include "qemu/timer.h" /* Note: the long term plan is to reduce the dependancies on the QEMU CPU definitions. Currently they are used for qemu_ld/st @@ -68,6 +68,24 @@ static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend); +/* The CIE and FDE header definitions will be common to all hosts. */ +typedef struct { + uint32_t len __attribute__((aligned((sizeof(void *))))); + uint32_t id; + uint8_t version; + char augmentation[1]; + uint8_t code_align; + uint8_t data_align; + uint8_t return_column; +} DebugFrameCIE; + +typedef struct QEMU_PACKED { + uint32_t len __attribute__((aligned((sizeof(void *))))); + uint32_t cie_offset; + tcg_target_long func_start; + tcg_target_long func_len; +} DebugFrameFDEHeader; + static void tcg_register_jit_int(void *buf, size_t size, void *debug_frame, size_t debug_frame_size) __attribute__((unused)); @@ -263,11 +281,21 @@ void tcg_context_init(TCGContext *s) void tcg_prologue_init(TCGContext *s) { /* init global prologue and epilogue */ - s->code_buf = code_gen_prologue; + s->code_buf = s->code_gen_prologue; s->code_ptr = s->code_buf; tcg_target_qemu_prologue(s); flush_icache_range((tcg_target_ulong)s->code_buf, (tcg_target_ulong)s->code_ptr); + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { + size_t size = s->code_ptr - s->code_buf; + qemu_log("PROLOGUE: [size=%zu]\n", size); + log_disas(s->code_buf, size); + qemu_log("\n"); + qemu_log_flush(); + } +#endif } void tcg_set_frame(TCGContext *s, int reg, @@ -800,7 +828,6 @@ static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, assert(idx >= 0 && idx < s->nb_temps); ts = &s->temps[idx]; - assert(ts); if (idx < s->nb_globals) { pstrcpy(buf, buf_size, ts->name); } else { @@ -1151,9 +1178,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) i = 0; for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { const TCGOpDef *def = &tcg_op_defs[op]; - if (op < INDEX_op_call - || op == INDEX_op_debug_insn_start - || (def->flags & TCG_OPF_NOT_PRESENT)) { + if (def->flags & TCG_OPF_NOT_PRESENT) { /* Wrong entry in op definitions? */ if (def->used) { fprintf(stderr, "Invalid op definition for %s\n", def->name); @@ -1218,7 +1243,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static void tcg_liveness_analysis(TCGContext *s) { int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; - TCGOpcode op; + TCGOpcode op, op_new; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; @@ -1325,7 +1350,17 @@ static void tcg_liveness_analysis(TCGContext *s) break; case INDEX_op_add2_i32: + op_new = INDEX_op_add_i32; + goto do_addsub2; case INDEX_op_sub2_i32: + op_new = INDEX_op_sub_i32; + goto do_addsub2; + case INDEX_op_add2_i64: + op_new = INDEX_op_add_i64; + goto do_addsub2; + case INDEX_op_sub2_i64: + op_new = INDEX_op_sub_i64; + do_addsub2: args -= 6; nb_iargs = 4; nb_oargs = 2; @@ -1338,12 +1373,7 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_remove; } /* Create the single operation plus nop. */ - if (op == INDEX_op_add2_i32) { - op = INDEX_op_add_i32; - } else { - op = INDEX_op_sub_i32; - } - s->gen_opc_buf[op_index] = op; + s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[4]; assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); @@ -1355,6 +1385,13 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: + case INDEX_op_muls2_i32: + op_new = INDEX_op_mul_i32; + goto do_mul2; + case INDEX_op_mulu2_i64: + case INDEX_op_muls2_i64: + op_new = INDEX_op_mul_i64; + do_mul2: args -= 4; nb_iargs = 2; nb_oargs = 2; @@ -1363,7 +1400,7 @@ static void tcg_liveness_analysis(TCGContext *s) if (dead_temps[args[0]] && !mem_temps[args[0]]) { goto do_remove; } - s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32; + s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[3]; assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); @@ -2638,9 +2675,9 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, img->sym[1].st_size = buf_size; img->di.cu_low_pc = buf; - img->di.cu_high_pc = buf_size; + img->di.cu_high_pc = buf + buf_size; img->di.fn_low_pc = buf; - img->di.fn_high_pc = buf_size; + img->di.fn_high_pc = buf + buf_size; #ifdef DEBUG_JIT /* Enable this block to be able to debug the ELF image file creation. @@ -57,9 +57,10 @@ typedef uint64_t TCGRegSet; #error unsupported #endif -/* Turn some undef macros into false macros. */ #if TCG_TARGET_REG_BITS == 32 +/* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_ext8s_i64 0 @@ -80,6 +81,14 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +/* Turn some undef macros into true macros. */ +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 #endif #ifndef TCG_TARGET_deposit_i32_valid @@ -94,11 +103,13 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_div2_i32 0 #elif defined(TCG_TARGET_HAS_div2_i32) #define TCG_TARGET_HAS_div_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 #endif #if defined(TCG_TARGET_HAS_div_i64) #define TCG_TARGET_HAS_div2_i64 0 #elif defined(TCG_TARGET_HAS_div2_i64) #define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 #endif typedef enum TCGOpcode { @@ -270,6 +281,9 @@ typedef int TCGv_i64; #define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1) #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1) +#define TCGV_IS_UNUSED_I32(x) (GET_TCGV_I32(x) == -1) +#define TCGV_IS_UNUSED_I64(x) (GET_TCGV_I64(x) == -1) + /* call flags */ /* Helper does not read globals (either directly or through an exception). It implies TCG_CALL_NO_WRITE_GLOBALS. */ @@ -290,8 +304,8 @@ typedef int TCGv_i64; #define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1) #define TCG_CALL_DUMMY_ARG ((TCGArg)(-1)) -/* Conditions. Note that these are layed out for easy manipulation by - the the functions below: +/* Conditions. Note that these are laid out for easy manipulation by + the functions below: bit 0 is used for inverting; bit 1 is signed, bit 2 is unsigned, @@ -455,6 +469,20 @@ struct TCGContext { uint16_t *gen_opc_ptr; TCGArg *gen_opparam_ptr; + target_ulong gen_opc_pc[OPC_BUF_SIZE]; + uint16_t gen_opc_icount[OPC_BUF_SIZE]; + uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; + + /* Code generation */ + int code_gen_max_blocks; + uint8_t *code_gen_prologue; + uint8_t *code_gen_buffer; + size_t code_gen_buffer_size; + /* threshold to flush the translated code buffer */ + size_t code_gen_buffer_max_size; + uint8_t *code_gen_ptr; + + TBContext tb_ctx; #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* labels info for qemu_ld/st IRs @@ -568,7 +596,8 @@ enum { TCG_OPF_SIDE_EFFECTS = 0x04, /* Instruction operands are 64-bits (otherwise 32-bits). */ TCG_OPF_64BIT = 0x08, - /* Instruction is optional and not implemented by the host. */ + /* Instruction is optional and not implemented by the host, or insn + is generic and should not be implemened by the host. */ TCG_OPF_NOT_PRESENT = 0x10, }; @@ -652,12 +681,58 @@ TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); -extern uint8_t *code_gen_prologue; +/** + * tcg_qemu_tb_exec: + * @env: CPUArchState * for the CPU + * @tb_ptr: address of generated code for the TB to execute + * + * Start executing code from a given translation block. + * Where translation blocks have been linked, execution + * may proceed from the given TB into successive ones. + * Control eventually returns only when some action is needed + * from the top-level loop: either control must pass to a TB + * which has not yet been directly linked, or an asynchronous + * event such as an interrupt needs handling. + * + * The return value is a pointer to the next TB to execute + * (if known; otherwise zero). This pointer is assumed to be + * 4-aligned, and the bottom two bits are used to return further + * information: + * 0, 1: the link between this TB and the next is via the specified + * TB index (0 or 1). That is, we left the TB via (the equivalent + * of) "goto_tb <index>". The main loop uses this to determine + * how to link the TB just executed to the next. + * 2: we are using instruction counting code generation, and we + * did not start executing this TB because the instruction counter + * would hit zero midway through it. In this case the next-TB pointer + * returned is the TB we were about to execute, and the caller must + * arrange to execute the remaining count of instructions. + * 3: we stopped because the CPU's exit_request flag was set + * (usually meaning that there is an interrupt that needs to be + * handled). The next-TB pointer returned is the TB we were + * about to execute when we noticed the pending exit request. + * + * If the bottom two bits indicate an exit-via-index then the CPU + * state is correctly synchronised and ready for execution of the next + * TB (and in particular the guest PC is the address to execute next). + * Otherwise, we gave up on execution of this TB before it started, and + * the caller must fix up the CPU state by calling cpu_pc_from_tb() + * with the next-TB pointer we return. + * + * Note that TCG targets may use a different definition of tcg_qemu_tb_exec + * to this default (which just calls the prologue.code emitted by + * tcg_target_qemu_prologue()). + */ +#define TB_EXIT_MASK 3 +#define TB_EXIT_IDX0 0 +#define TB_EXIT_IDX1 1 +#define TB_EXIT_ICOUNT_EXPIRED 2 +#define TB_EXIT_REQUESTED 3 -/* TCG targets may use a different definition of tcg_qemu_tb_exec. */ #if !defined(tcg_qemu_tb_exec) # define tcg_qemu_tb_exec(env, tb_ptr) \ - ((tcg_target_ulong (*)(void *, void *))code_gen_prologue)(env, tb_ptr) + ((tcg_target_ulong (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, \ + tb_ptr) #endif void tcg_register_jit(void *buf, size_t buf_size); diff --git a/tcg/tci/README b/tcg/tci/README index 6ac1ac99d..dc57f076b 100644 --- a/tcg/tci/README +++ b/tcg/tci/README @@ -52,7 +52,7 @@ The only difference from running QEMU with TCI to running without TCI should be speed. Especially during development of TCI, it was very useful to compare runs with and without TCI. Create /tmp/qemu.log by - qemu-system-i386 -d in_asm,op_opt,cpu -singlestep + qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep once with interpreter and once without interpreter and compare the resulting qemu.log files. This is also useful to see the effects of additional diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 1707169ea..e118bc717 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -34,20 +34,9 @@ tcg_abort(); \ } while (0) -/* Single bit n. */ -#define BIT(n) (1 << (n)) - /* Bitfield n...m (in 32 bit value). */ #define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) -/* Used for function call generation. */ -#define TCG_REG_CALL_STACK TCG_REG_R4 -#define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_STACK_OFFSET 0 - -/* TODO: documentation. */ -static uint8_t *tb_ret_addr; - /* Macros used in tcg_target_op_defs. */ #define R "r" #define RI "ri" @@ -513,7 +502,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_out_op_t(s, INDEX_op_ld_i64); tcg_out_r(s, ret); tcg_out_r(s, arg1); - assert(arg2 == (uint32_t)arg2); + assert(arg2 == (int32_t)arg2); tcg_out32(s, arg2); #else TODO(); @@ -636,7 +625,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_st_i64: tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); - assert(args[2] == (uint32_t)args[2]); + assert(args[2] == (int32_t)args[2]); tcg_out32(s, args[2]); break; case INDEX_op_add_i32: @@ -888,7 +877,7 @@ static void tcg_target_init(TCGContext *s) #if defined(CONFIG_DEBUG_TCG_INTERPRETER) const char *envval = getenv("DEBUG_TCG"); if (envval) { - cpu_set_log(strtol(envval, NULL, 0)); + qemu_set_log(strtol(envval, NULL, 0)); } #endif @@ -904,15 +893,19 @@ static void tcg_target_init(TCGContext *s) /* TODO: Which registers should be set here? */ tcg_regset_set32(tcg_target_call_clobber_regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); + tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); tcg_add_target_add_op_defs(tcg_target_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), + + /* We use negative offsets from "sp" so that we can distinguish + stores that might pretend to be call arguments. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, + -CPU_TEMP_BUF_NLONGS * sizeof(long), CPU_TEMP_BUF_NLONGS * sizeof(long)); } /* Generate global QEMU prologue and epilogue code. */ -static void tcg_target_qemu_prologue(TCGContext *s) +static inline void tcg_target_qemu_prologue(TCGContext *s) { - tb_ret_addr = s->code_ptr; } diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index a832f5cf5..d7fc14eb1 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -59,9 +59,8 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -/* Not more than one of the next two defines must be 1. */ #define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_div2_i32 0 +#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_ext8u_i32 1 @@ -76,15 +75,15 @@ #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 -/* Not more than one of the next two defines must be 1. */ #define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_div2_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 @@ -100,6 +99,14 @@ #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 + +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ /* Number of registers available. @@ -118,7 +125,6 @@ typedef enum { TCG_REG_R5, TCG_REG_R6, TCG_REG_R7, - TCG_AREG0 = TCG_REG_R7, #if TCG_TARGET_NB_REGS >= 16 TCG_REG_R8, TCG_REG_R9, @@ -151,6 +157,13 @@ typedef enum { TCG_CONST = UINT8_MAX } TCGReg; +#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2) + +/* Used for function call generation. */ +#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1) +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_STACK_ALIGN 16 + void tci_disas(uint8_t opc); tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); |