diff options
author | SeokYeon Hwang <syeon.hwang@samsung.com> | 2013-12-11 10:38:35 +0900 |
---|---|---|
committer | SeokYeon Hwang <syeon.hwang@samsung.com> | 2013-12-11 14:12:09 +0900 |
commit | 1a81500c243dfe21a1c348d24b116b6315c66c83 (patch) | |
tree | 8164064245de0479a0b5ae5b026719e81420de49 /tcg | |
parent | 34668d898e6581ddc47865e6de2d30b55ef47031 (diff) | |
parent | 0e7b9f06a6cc032be6ca2ac55a27592abd374179 (diff) | |
download | qemu-1a81500c243dfe21a1c348d24b116b6315c66c83.tar.gz qemu-1a81500c243dfe21a1c348d24b116b6315c66c83.tar.bz2 qemu-1a81500c243dfe21a1c348d24b116b6315c66c83.zip |
Merge branch 'upstream-1.7' into tizen_qemu_1.7
Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Conflicts:
VERSION
block/cow.c
block/raw-win32.c
block/stream.c
block/vmdk.c
blockdev.c
exec.c
hw/i386/pc_piix.c
hw/scsi/scsi-bus.c
include/qom/cpu.h
include/sysemu/kvm.h
qemu-img.c
tcg/tcg.c
tcg/tcg.h
vl.c
Change-Id: Ib8de93ad2c05150934e17e63d7f8e90ffdfccc62
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 43 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.c | 59 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 816 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 13 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.c | 1829 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 126 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 842 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 19 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 778 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 13 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 259 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 59 | ||||
-rw-r--r-- | tcg/optimize.c | 55 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 696 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 6 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.c | 1195 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 10 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 15 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 14 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 26 | ||||
-rw-r--r-- | tcg/tcg-be-ldst.h | 90 | ||||
-rw-r--r-- | tcg/tcg-be-null.h | 43 | ||||
-rw-r--r-- | tcg/tcg-op.h | 281 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 100 | ||||
-rw-r--r-- | tcg/tcg.c | 445 | ||||
-rw-r--r-- | tcg/tcg.h | 244 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c | 16 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 20 |
30 files changed, 3289 insertions, 4841 deletions
diff --git a/tcg/README b/tcg/README index 063aeb95ea..f1782123b7 100644 --- a/tcg/README +++ b/tcg/README @@ -412,30 +412,25 @@ current TB was linked to this TB. Otherwise execute the next instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued at most once with each slot index per TB. -* qemu_ld8u t0, t1, flags -qemu_ld8s t0, t1, flags -qemu_ld16u t0, t1, flags -qemu_ld16s t0, t1, flags -qemu_ld32 t0, t1, flags -qemu_ld32u t0, t1, flags -qemu_ld32s t0, t1, flags -qemu_ld64 t0, t1, flags - -Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address -type. 'flags' contains the QEMU memory index (selects user or kernel access) -for example. - -Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and -"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits. - -* qemu_st8 t0, t1, flags -qemu_st16 t0, t1, flags -qemu_st32 t0, t1, flags -qemu_st64 t0, t1, flags - -Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU -address type. 'flags' contains the QEMU memory index (selects user or -kernel access) for example. +* qemu_ld_i32/i64 t0, t1, flags, memidx +* qemu_st_i32/i64 t0, t1, flags, memidx + +Load data at the guest address t1 into t0, or store data in t0 at guest +address t1. The _i32/_i64 size applies to the size of the input/output +register t0 only. The address t1 is always sized according to the guest, +and the width of the memory operation is controlled by flags. + +Both t0 and t1 may be split into little-endian ordered pairs of registers +if dealing with 64-bit quantities on a 32-bit host. + +The memidx selects the qemu tlb index to use (e.g. user or kernel access). +The flags are the TCGMemOp bits, selecting the sign, width, and endianness +of the memory access. + +For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a +64-bit memory access specified in flags. + +********* Note 1: Some shortcuts are defined when the last operand is known to be a constant (e.g. addi for add, movi for mov). diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 41a17f8a62..04d7ae328d 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory for details. */ +#include "tcg-be-ldst.h" #include "qemu/bitops.h" #ifndef NDEBUG @@ -88,7 +89,7 @@ static inline void reloc_pc19(void *code_ptr, tcg_target_long target) } static inline void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { value += addend; @@ -423,14 +424,14 @@ static inline void tcg_out_mov(TCGContext *s, } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD, arg, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST, arg, arg1, arg2); @@ -778,24 +779,24 @@ static inline void tcg_out_nop(TCGContext *s) } #ifdef CONFIG_SOFTMMU -#include "exec/softmmu_defs.h" - -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_ret_lduw_mmu, + helper_ret_ldul_mmu, + helper_ret_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) @@ -804,6 +805,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr); tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (tcg_target_long)qemu_ld_helpers[lb->opc & 3]); tcg_out_callr(s, TCG_REG_TMP); @@ -824,6 +826,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (tcg_target_long)lb->raddr); tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (tcg_target_long)qemu_st_helpers[lb->opc & 3]); tcg_out_callr(s, TCG_REG_TMP); @@ -832,33 +835,13 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_goto(s, (tcg_target_long)lb->raddr); } -void tcg_out_tb_finalize(TCGContext *s) -{ - int i; - for (i = 0; i < s->nb_qemu_ldst_labels; i++) { - TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; - if (label->is_ld) { - tcg_out_qemu_ld_slow_path(s, label); - } else { - tcg_out_qemu_st_slow_path(s, label); - } - } -} - static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, TCGReg data_reg, TCGReg addr_reg, int mem_index, uint8_t *raddr, uint8_t *label_ptr) { - int idx; - TCGLabelQemuLdst *label; - - if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { - tcg_abort(); - } + TCGLabelQemuLdst *label = new_ldst_label(s); - idx = s->nb_qemu_ldst_labels++; - label = &s->qemu_ldst_labels[idx]; label->is_ld = is_ld; label->opc = opc; label->datalo_reg = data_reg; diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 51e50920b2..82ad919518 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -61,6 +61,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 @@ -87,13 +89,16 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 enum { TCG_AREG0 = TCG_REG_X19, }; -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +#define TCG_TARGET_HAS_new_ldst 0 + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { __builtin___clear_cache((char *)start, (char *)stop); } diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 6c4854dbb0..e93a4a237b 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-ldst.h" + /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ #ifndef __ARM_ARCH # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ @@ -108,21 +110,21 @@ static const int tcg_target_call_oarg_regs[2] = { #define TCG_REG_TMP TCG_REG_R12 -static inline void reloc_abs32(void *code_ptr, tcg_target_long target) +static inline void reloc_abs32(void *code_ptr, intptr_t target) { *(uint32_t *) code_ptr = target; } -static inline void reloc_pc24(void *code_ptr, tcg_target_long target) +static inline void reloc_pc24(void *code_ptr, intptr_t target) { - uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2); + uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2); *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) | (offset & 0xffffff); } static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { switch (type) { case R_ARM_ABS32: @@ -175,24 +177,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); #ifdef CONFIG_SOFTMMU - /* r0-r2 will be overwritten when reading the tlb entry, + /* r0-r2,lr will be overwritten when reading the tlb entry, so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); -#endif - break; - case 'L': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); -#ifdef CONFIG_SOFTMMU - /* r1 is still needed to load data_reg or data_reg2, - so don't use it. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); #endif break; - /* qemu_st address & data_reg */ + /* qemu_st address & data */ case 's': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); @@ -207,6 +201,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) /* Avoid clashes with registers being used for helper args */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); #endif + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); #endif break; @@ -320,6 +315,9 @@ typedef enum { INSN_STRB_REG = 0x06400000, INSN_LDRD_IMM = 0x004000d0, + INSN_LDRD_REG = 0x000000d0, + INSN_STRD_IMM = 0x004000f0, + INSN_STRD_REG = 0x000000f0, } ARMInsn; #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) @@ -379,13 +377,17 @@ static inline void tcg_out_b_noaddr(TCGContext *s, int cond) /* We pay attention here to not modify the branch target by skipping the corresponding bytes. This ensure that caches and memory are kept coherent during retranslation. */ -#ifdef HOST_WORDS_BIGENDIAN - tcg_out8(s, (cond << 4) | 0x0a); - s->code_ptr += 3; -#else s->code_ptr += 3; tcg_out8(s, (cond << 4) | 0x0a); -#endif +} + +static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) +{ + /* We pay attention here to not modify the branch target by skipping + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ + s->code_ptr += 3; + tcg_out8(s, (cond << 4) | 0x0b); } static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) @@ -810,6 +812,30 @@ static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); } +static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); +} + /* Register pre-increment with base writeback. */ static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, TCGReg rn, TCGReg rm) @@ -975,34 +1001,27 @@ static inline void tcg_out_st8(TCGContext *s, int cond, tcg_out_st8_12(s, cond, rd, rn, offset); } -/* The _goto case is normally between TBs within the same code buffer, - * and with the code buffer limited to 16MB we shouldn't need the long - * case. - * - * .... except to the prologue that is in its own buffer. +/* The _goto case is normally between TBs within the same code buffer, and + * with the code buffer limited to 16MB we wouldn't need the long case. + * But we also use it for the tail-call to the qemu_ld/st helpers, which does. */ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) { - int32_t val; + int32_t disp = addr - (tcg_target_long) s->code_ptr; - if (addr & 1) { - /* goto to a Thumb destination isn't supported */ - tcg_abort(); + if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { + tcg_out_b(s, cond, disp); + return; } - val = addr - (tcg_target_long) s->code_ptr; - if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd) - tcg_out_b(s, cond, val); - else { - if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); - } else { - tcg_out_movi32(s, cond, TCG_REG_TMP, val - 8); - tcg_out_dat_reg(s, cond, ARITH_ADD, - TCG_REG_PC, TCG_REG_PC, - TCG_REG_TMP, SHIFT_IMM_LSL(0)); + tcg_out_movi32(s, cond, TCG_REG_TMP, addr); + if (use_armv5t_instructions) { + tcg_out_bx(s, cond, TCG_REG_TMP); + } else { + if (addr & 1) { + tcg_abort(); } + tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); } } @@ -1057,25 +1076,37 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) } #ifdef CONFIG_SOFTMMU - -#include "exec/softmmu_defs.h" - -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LESL] = helper_le_ldul_mmu, + + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BESL] = helper_be_ldul_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; /* Helper routines for marshalling helper function arguments into @@ -1119,53 +1150,62 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, if (argreg & 1) { argreg++; } - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; + if (use_armv6_instructions && argreg >= 4 + && (arglo & 1) == 0 && arghi == arglo + 1) { + tcg_out_strd_8(s, COND_AL, arglo, + TCG_REG_CALL_STACK, (argreg - 4) * 4); + return argreg + 2; + } else { + argreg = tcg_out_arg_reg32(s, argreg, arglo); + argreg = tcg_out_arg_reg32(s, argreg, arghi); + return argreg; + } } #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) -/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing - to the tlb entry. Clobbers R1 and TMP. */ +/* We're expecting to use an 8-bit immediate and to mask. */ +QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); + +/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0xffff); + +/* Load and compare a TLB entry, leaving the flags set. Returns the register + containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int s_bits, int tlb_offset) +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + TCGMemOp s_bits, int mem_index, bool is_load) { TCGReg base = TCG_AREG0; + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); /* Should generate something like the following: - * pre-v7: - * shr tmp, addr_reg, #TARGET_PAGE_BITS (1) - * add r2, env, #off & 0xff00 + * shr tmp, addrlo, #TARGET_PAGE_BITS (1) + * add r2, env, #high * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) - * ldr r0, [r2, #off & 0xff]! (4) - * tst addr_reg, #s_mask - * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5) - * - * v7 (not implemented yet): - * ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1) - * movw tmp, #~TARGET_PAGE_MASK & ~s_mask - * movw r0, #off - * add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2) - * bic tmp, addr_reg, tmp - * ldr r0, [r2, r0]! (3) - * cmp r0, tmp (4) + * ldr r0, [r2, #cmp] (4) + * tst addrlo, #s_mask + * ldr r2, [r2, #add] (5) + * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS */ -# if CPU_TLB_BITS > 8 -# error -# endif tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - /* We assume that the offset is contained within 16 bits. */ - assert((tlb_offset & ~0xffff) == 0); - if (tlb_offset > 0xff) { + /* We checked that the offset is contained within 16 bits above. */ + if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - (24 << 7) | (tlb_offset >> 8)); - tlb_offset &= 0xff; + (24 << 7) | (cmp_off >> 8)); base = TCG_REG_R2; + add_off -= cmp_off & 0xff00; + cmp_off &= 0xff; } tcg_out_dat_imm(s, COND_AL, ARITH_AND, @@ -1177,14 +1217,11 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, but due to how the pointer needs setting up, ldm isn't useful. Base arm5 doesn't have ldrd, but armv5te does. */ if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0, - TCG_REG_R2, tlb_offset, 1, 1); + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); } else { - tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, - TCG_REG_R2, tlb_offset, 1, 1); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); if (TARGET_LONG_BITS == 64) { - tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1, - TCG_REG_R2, 4, 1, 0); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); } } @@ -1194,6 +1231,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, 0, addrlo, (1 << s_bits) - 1); } + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); @@ -1201,31 +1241,26 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); } + + return TCG_REG_R2; } /* Record the context of a call to the out of line helper code for the slow path for a load or store, so that we can later generate the correct helper code. */ -static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, - int data_reg, int data_reg2, int addrlo_reg, - int addrhi_reg, int mem_index, +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, TCGReg addrlo, + TCGReg addrhi, int mem_index, uint8_t *raddr, uint8_t *label_ptr) { - int idx; - TCGLabelQemuLdst *label; - - if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { - tcg_abort(); - } + TCGLabelQemuLdst *label = new_ldst_label(s); - idx = s->nb_qemu_ldst_labels++; - label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; label->is_ld = is_ld; label->opc = opc; - label->datalo_reg = data_reg; - label->datahi_reg = data_reg2; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; label->mem_index = mem_index; label->raddr = raddr; label->label_ptr[0] = label_ptr; @@ -1233,8 +1268,9 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg, data_reg, data_reg2; - uint8_t *start; + TCGReg argreg, datalo, datahi; + TCGMemOp opc = lb->opc; + uintptr_t func; reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); @@ -1245,46 +1281,46 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); } argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); - tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[lb->opc & 3]); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - data_reg = lb->datalo_reg; - data_reg2 = lb->datahi_reg; + /* For armv6 we can use the canonical unsigned helpers and minimize + icache usage. For pre-armv6, use the signed helpers since we do + not have a single insn sign-extend. */ + if (use_armv6_instructions) { + func = (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]; + } else { + func = (uintptr_t)qemu_ld_helpers[opc]; + if (opc & MO_SIGN) { + opc = MO_UL; + } + } + tcg_out_call(s, func); - start = s->code_ptr; - switch (lb->opc) { - case 0 | 4: - tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); break; - case 1 | 4: - tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); + case MO_SW: + tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); break; - case 0: - case 1: - case 2: default: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 3: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); - break; - } - - /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between - the call and the branch back to straight-line code. Note that the - moves above could be elided by register allocation, nor do we know - which code alternative we chose for extension. */ - switch (s->code_ptr - start) { - case 0: - tcg_out_nop(s); - /* FALLTHRU */ - case 4: - tcg_out_nop(s); - /* FALLTHRU */ - case 8: + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + break; + case MO_Q: + if (datalo != TCG_REG_R1) { + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + } else if (datahi != TCG_REG_R0) { + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + } else { + tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); + } break; - default: - abort(); } tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); @@ -1292,7 +1328,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg, data_reg, data_reg2; + TCGReg argreg, datalo, datahi; + TCGMemOp opc = lb->opc; reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); @@ -1304,293 +1341,311 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); } - data_reg = lb->datalo_reg; - data_reg2 = lb->datahi_reg; - switch (lb->opc) { - case 0: - argreg = tcg_out_arg_reg8(s, argreg, data_reg); + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SIZE) { + case MO_8: + argreg = tcg_out_arg_reg8(s, argreg, datalo); break; - case 1: - argreg = tcg_out_arg_reg16(s, argreg, data_reg); + case MO_16: + argreg = tcg_out_arg_reg16(s, argreg, datalo); break; - case 2: - argreg = tcg_out_arg_reg32(s, argreg, data_reg); + case MO_32: + default: + argreg = tcg_out_arg_reg32(s, argreg, datalo); break; - case 3: - argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); + case MO_64: + argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); break; } argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); - tcg_out_call(s, (tcg_target_long) qemu_st_helpers[lb->opc & 3]); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between - the call and the branch back to straight-line code. */ - tcg_out_nop(s); - tcg_out_nop(s); - tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); + /* Tail-call to the helper, which will return to the fast path. */ + tcg_out_goto(s, COND_AL, (uintptr_t)qemu_st_helpers[opc]); } #endif /* SOFTMMU */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) { - TCGReg addr_reg, data_reg, data_reg2; - bool bswap; -#ifdef CONFIG_SOFTMMU - int mem_index, s_bits; - TCGReg addr_reg2; - uint8_t *label_ptr; -#endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif - - data_reg = *args++; - data_reg2 = (opc == 3 ? *args++ : 0); - addr_reg = *args++; -#ifdef CONFIG_SOFTMMU - addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); - mem_index = *args; - s_bits = opc & 3; - - tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); - - label_ptr = s->code_ptr; - tcg_out_b_noaddr(s, COND_NE); - - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read)); + TCGMemOp bswap = opc & MO_BSWAP; - switch (opc) { - case 0: - tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); break; - case 0 | 4: - tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + case MO_SB: + tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); break; - case 1: - tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + case MO_UW: + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); if (bswap) { - tcg_out_bswap16(s, COND_AL, data_reg, data_reg); + tcg_out_bswap16(s, COND_AL, datalo, datalo); } break; - case 1 | 4: + case MO_SW: if (bswap) { - tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); - tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); } else { - tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); } break; - case 2: + case MO_UL: default: - tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); if (bswap) { - tcg_out_bswap32(s, COND_AL, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, datalo, datalo); } break; - case 3: - if (bswap) { - tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4); - tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); - tcg_out_bswap32(s, COND_AL, data_reg, data_reg); - } else { - tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + if (use_armv6_instructions && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); + } else if (dl != addend) { + tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); + tcg_out_ld32_12(s, COND_AL, dh, addend, 4); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); + tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } } break; } +} - add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2, - mem_index, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - uint32_t offset = GUEST_BASE; - int i, rot; - - while (offset) { - i = ctz32(offset) & ~1; - rot = ((32 - i) << 7) & 0xf00; +static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addr_reg, - ((offset >> i) & 0xff) | rot); - addr_reg = TCG_REG_TMP; - offset &= ~(0xff << i); - } - } - switch (opc) { - case 0: - tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0); + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); break; - case 0 | 4: - tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0); + case MO_SB: + tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); break; - case 1: - tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); + case MO_UW: + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); if (bswap) { - tcg_out_bswap16(s, COND_AL, data_reg, data_reg); + tcg_out_bswap16(s, COND_AL, datalo, datalo); } break; - case 1 | 4: + case MO_SW: if (bswap) { - tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); - tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); } else { - tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); } break; - case 2: + case MO_UL: default: - tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); if (bswap) { - tcg_out_bswap32(s, COND_AL, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, datalo, datalo); } break; - case 3: - /* TODO: use block load - - * check that data_reg2 > data_reg or the other way */ - if (data_reg == addr_reg) { - tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, data_reg, data_reg); - tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + if (use_armv6_instructions && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); + } else if (dl == addrlo) { + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + } else { + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } } break; } -#endif } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) { - TCGReg addr_reg, data_reg, data_reg2; - bool bswap; + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOp opc; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits; - TCGReg addr_reg2; + int mem_index; + TCGReg addend; uint8_t *label_ptr; #endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif - data_reg = *args++; - data_reg2 = (opc == 3 ? *args++ : 0); - addr_reg = *args++; + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + #ifdef CONFIG_SOFTMMU - addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; - s_bits = opc & 3; - - tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, - offsetof(CPUArchState, - tlb_table[mem_index][0].addr_write)); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + /* This a conditional BL only to load a pointer within this opcode into LR + for the slow path. We will not be using the value for a tail call. */ label_ptr = s->code_ptr; - tcg_out_b_noaddr(s, COND_NE); + tcg_out_bl_noaddr(s, COND_NE); - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write)); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - switch (opc) { - case 0: - tcg_out_st8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_r(s, cond, datalo, addrlo, addend); break; - case 1: + case MO_16: if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st16_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); + tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); } else { - tcg_out_st16_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st16_r(s, cond, datalo, addrlo, addend); } break; - case 2: + case MO_32: default: if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st32_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); } else { - tcg_out_st32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st32_r(s, cond, datalo, addrlo, addend); } break; - case 3: + case MO_64: if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); - tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4); + tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); + tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); + } else if (use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { - tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); - tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); + tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); + tcg_out_st32_12(s, cond, datahi, addend, 4); } break; } +} - add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2, - mem_index, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - uint32_t offset = GUEST_BASE; - int i; - int rot; - - while (offset) { - i = ctz32(offset) & ~1; - rot = ((32 - i) << 7) & 0xf00; - - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg, - ((offset >> i) & 0xff) | rot); - addr_reg = TCG_REG_R1; - offset &= ~(0xff << i); - } - } - switch (opc) { - case 0: - tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0); +static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); break; - case 1: + case MO_16: if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0); + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); } else { - tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); } break; - case 2: + case MO_32: default: if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); } else { - tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); } break; - case 3: - /* TODO: use block store - - * check that data_reg2 > data_reg or the other way */ + case MO_64: if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); + } else if (use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { - tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); - tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4); + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); + tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); } break; } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + uint8_t *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + +#ifdef CONFIG_SOFTMMU + mem_index = *args; + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); + + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + + /* The conditional call must come last, as we're going to return here. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, + datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); + } #endif } @@ -1859,37 +1914,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, ARITH_MOV, args[0], 0, 0); break; - case INDEX_op_qemu_ld8u: + case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args, 0); break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: + case INDEX_op_qemu_ld_i64: tcg_out_qemu_ld(s, args, 1); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); - break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); - break; - - case INDEX_op_qemu_st8: + case INDEX_op_qemu_st_i32: tcg_out_qemu_st(s, args, 0); break; - case INDEX_op_qemu_st16: + case INDEX_op_qemu_st_i64: tcg_out_qemu_st(s, args, 1); break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); - break; case INDEX_op_bswap16_i32: tcg_out_bswap16(s, COND_AL, args[0], args[1]); @@ -1925,22 +1961,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } } -#ifdef CONFIG_SOFTMMU -/* Generate TB finalization at the end of block. */ -void tcg_out_tb_finalize(TCGContext *s) -{ - int i; - for (i = 0; i < s->nb_qemu_ldst_labels; i++) { - TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; - if (label->is_ld) { - tcg_out_qemu_ld_slow_path(s, label); - } else { - tcg_out_qemu_st_slow_path(s, label); - } - } -} -#endif /* SOFTMMU */ - static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, @@ -1988,29 +2008,15 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, #if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "r", "l" } }, - { INDEX_op_qemu_ld8s, { "r", "l" } }, - { INDEX_op_qemu_ld16u, { "r", "l" } }, - { INDEX_op_qemu_ld16s, { "r", "l" } }, - { INDEX_op_qemu_ld32, { "r", "l" } }, - { INDEX_op_qemu_ld64, { "L", "L", "l" } }, - - { INDEX_op_qemu_st8, { "s", "s" } }, - { INDEX_op_qemu_st16, { "s", "s" } }, - { INDEX_op_qemu_st32, { "s", "s" } }, - { INDEX_op_qemu_st64, { "s", "s", "s" } }, + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, #else - { INDEX_op_qemu_ld8u, { "r", "l", "l" } }, - { INDEX_op_qemu_ld8s, { "r", "l", "l" } }, - { INDEX_op_qemu_ld16u, { "r", "l", "l" } }, - { INDEX_op_qemu_ld16s, { "r", "l", "l" } }, - { INDEX_op_qemu_ld32, { "r", "l", "l" } }, - { INDEX_op_qemu_ld64, { "L", "L", "l", "l" } }, - - { INDEX_op_qemu_st8, { "s", "s", "s" } }, - { INDEX_op_qemu_st16, { "s", "s", "s" } }, - { INDEX_op_qemu_st32, { "s", "s", "s" } }, - { INDEX_op_qemu_st64, { "s", "s", "s", "s" } }, + { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, #endif { INDEX_op_bswap16_i32, { "r", "r" } }, @@ -2065,13 +2071,13 @@ static void tcg_target_init(TCGContext *s) } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_st32(s, COND_AL, arg, arg1, arg2); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5cd9d6a679..3746b6e298 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -80,9 +80,13 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_new_ldst 1 + extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid @@ -90,15 +94,14 @@ enum { TCG_AREG0 = TCG_REG_R6, }; -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { #if QEMU_GNUC_PREREQ(4, 1) __builtin___clear_cache((char *) start, (char *) stop); #else - register unsigned long _beg __asm ("a1") = start; - register unsigned long _end __asm ("a2") = stop; - register unsigned long _flg __asm ("a3") = 0; + register uintptr_t _beg __asm("a1") = start; + register uintptr_t _end __asm("a2") = stop; + register uintptr_t _flg __asm("a3") = 0; __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c deleted file mode 100644 index 68f77ba4dd..0000000000 --- a/tcg/hppa/tcg-target.c +++ /dev/null @@ -1,1829 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", "%r1", "%rp", "%r3", "%r4", "%r5", "%r6", "%r7", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", - "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", - "%r24", "%r25", "%r26", "%dp", "%ret0", "%ret1", "%sp", "%r31", -}; -#endif - -/* This is an 8 byte temp slot in the stack frame. */ -#define STACK_TEMP_OFS -16 - -#ifdef CONFIG_USE_GUEST_BASE -#define TCG_GUEST_BASE_REG TCG_REG_R16 -#else -#define TCG_GUEST_BASE_REG TCG_REG_R0 -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - - TCG_REG_R17, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - - TCG_REG_R26, - TCG_REG_R25, - TCG_REG_R24, - TCG_REG_R23, - - TCG_REG_RET0, - TCG_REG_RET1, -}; - -static const int tcg_target_call_iarg_regs[4] = { - TCG_REG_R26, - TCG_REG_R25, - TCG_REG_R24, - TCG_REG_R23, -}; - -static const int tcg_target_call_oarg_regs[2] = { - TCG_REG_RET0, - TCG_REG_RET1, -}; - -/* True iff val fits a signed field of width BITS. */ -static inline int check_fit_tl(tcg_target_long val, unsigned int bits) -{ - return (val << ((sizeof(tcg_target_long) * 8 - bits)) - >> (sizeof(tcg_target_long) * 8 - bits)) == val; -} - -/* True iff depi can be used to compute (reg | MASK). - Accept a bit pattern like: - 0....01....1 - 1....10....0 - 0..01..10..0 - Copied from gcc sources. */ -static inline int or_mask_p(tcg_target_ulong mask) -{ - if (mask == 0 || mask == -1) { - return 0; - } - mask += mask & -mask; - return (mask & (mask - 1)) == 0; -} - -/* True iff depi or extru can be used to compute (reg & mask). - Accept a bit pattern like these: - 0....01....1 - 1....10....0 - 1..10..01..1 - Copied from gcc sources. */ -static inline int and_mask_p(tcg_target_ulong mask) -{ - return or_mask_p(~mask); -} - -static int low_sign_ext(int val, int len) -{ - return (((val << 1) & ~(-1u << len)) | ((val >> (len - 1)) & 1)); -} - -static int reassemble_12(int as12) -{ - return (((as12 & 0x800) >> 11) | - ((as12 & 0x400) >> 8) | - ((as12 & 0x3ff) << 3)); -} - -static int reassemble_17(int as17) -{ - return (((as17 & 0x10000) >> 16) | - ((as17 & 0x0f800) << 5) | - ((as17 & 0x00400) >> 8) | - ((as17 & 0x003ff) << 3)); -} - -static int reassemble_21(int as21) -{ - return (((as21 & 0x100000) >> 20) | - ((as21 & 0x0ffe00) >> 8) | - ((as21 & 0x000180) << 7) | - ((as21 & 0x00007c) << 14) | - ((as21 & 0x000003) << 12)); -} - -/* ??? Bizzarely, there is no PCREL12F relocation type. I guess all - such relocations are simply fully handled by the assembler. */ -#define R_PARISC_PCREL12F R_PARISC_NONE - -static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) -{ - uint32_t *insn_ptr = (uint32_t *)code_ptr; - uint32_t insn = *insn_ptr; - tcg_target_long pcrel; - - value += addend; - pcrel = (value - ((tcg_target_long)code_ptr + 8)) >> 2; - - switch (type) { - case R_PARISC_PCREL12F: - assert(check_fit_tl(pcrel, 12)); - /* ??? We assume all patches are forward. See tcg_out_brcond - re setting the NUL bit on the branch and eliding the nop. */ - assert(pcrel >= 0); - insn &= ~0x1ffdu; - insn |= reassemble_12(pcrel); - break; - case R_PARISC_PCREL17F: - assert(check_fit_tl(pcrel, 17)); - insn &= ~0x1f1ffdu; - insn |= reassemble_17(pcrel); - break; - default: - tcg_abort(); - } - - *insn_ptr = insn; -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'L': /* qemu_ld/st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R26); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R25); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R24); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R23); - break; - case 'Z': - ct->ct |= TCG_CT_CONST_0; - break; - case 'I': - ct->ct |= TCG_CT_CONST_S11; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S5; - break; - case 'K': - ct->ct |= TCG_CT_CONST_MS11; - break; - case 'M': - ct->ct |= TCG_CT_CONST_AND; - break; - case 'O': - ct->ct |= TCG_CT_CONST_OR; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if (ct & TCG_CT_CONST_0) { - return val == 0; - } else if (ct & TCG_CT_CONST_S5) { - return check_fit_tl(val, 5); - } else if (ct & TCG_CT_CONST_S11) { - return check_fit_tl(val, 11); - } else if (ct & TCG_CT_CONST_MS11) { - return check_fit_tl(-val, 11); - } else if (ct & TCG_CT_CONST_AND) { - return and_mask_p(val); - } else if (ct & TCG_CT_CONST_OR) { - return or_mask_p(val); - } - return 0; -} - -#define INSN_OP(x) ((x) << 26) -#define INSN_EXT3BR(x) ((x) << 13) -#define INSN_EXT3SH(x) ((x) << 10) -#define INSN_EXT4(x) ((x) << 6) -#define INSN_EXT5(x) (x) -#define INSN_EXT6(x) ((x) << 6) -#define INSN_EXT7(x) ((x) << 6) -#define INSN_EXT8A(x) ((x) << 6) -#define INSN_EXT8B(x) ((x) << 5) -#define INSN_T(x) (x) -#define INSN_R1(x) ((x) << 16) -#define INSN_R2(x) ((x) << 21) -#define INSN_DEP_LEN(x) (32 - (x)) -#define INSN_SHDEP_CP(x) ((31 - (x)) << 5) -#define INSN_SHDEP_P(x) ((x) << 5) -#define INSN_COND(x) ((x) << 13) -#define INSN_IM11(x) low_sign_ext(x, 11) -#define INSN_IM14(x) low_sign_ext(x, 14) -#define INSN_IM5(x) (low_sign_ext(x, 5) << 16) - -#define COND_NEVER 0 -#define COND_EQ 1 -#define COND_LT 2 -#define COND_LE 3 -#define COND_LTU 4 -#define COND_LEU 5 -#define COND_SV 6 -#define COND_OD 7 -#define COND_FALSE 8 - -#define INSN_ADD (INSN_OP(0x02) | INSN_EXT6(0x18)) -#define INSN_ADDC (INSN_OP(0x02) | INSN_EXT6(0x1c)) -#define INSN_ADDI (INSN_OP(0x2d)) -#define INSN_ADDIL (INSN_OP(0x0a)) -#define INSN_ADDL (INSN_OP(0x02) | INSN_EXT6(0x28)) -#define INSN_AND (INSN_OP(0x02) | INSN_EXT6(0x08)) -#define INSN_ANDCM (INSN_OP(0x02) | INSN_EXT6(0x00)) -#define INSN_COMCLR (INSN_OP(0x02) | INSN_EXT6(0x22)) -#define INSN_COMICLR (INSN_OP(0x24)) -#define INSN_DEP (INSN_OP(0x35) | INSN_EXT3SH(3)) -#define INSN_DEPI (INSN_OP(0x35) | INSN_EXT3SH(7)) -#define INSN_EXTRS (INSN_OP(0x34) | INSN_EXT3SH(7)) -#define INSN_EXTRU (INSN_OP(0x34) | INSN_EXT3SH(6)) -#define INSN_LDIL (INSN_OP(0x08)) -#define INSN_LDO (INSN_OP(0x0d)) -#define INSN_MTCTL (INSN_OP(0x00) | INSN_EXT8B(0xc2)) -#define INSN_OR (INSN_OP(0x02) | INSN_EXT6(0x09)) -#define INSN_SHD (INSN_OP(0x34) | INSN_EXT3SH(2)) -#define INSN_SUB (INSN_OP(0x02) | INSN_EXT6(0x10)) -#define INSN_SUBB (INSN_OP(0x02) | INSN_EXT6(0x14)) -#define INSN_SUBI (INSN_OP(0x25)) -#define INSN_VEXTRS (INSN_OP(0x34) | INSN_EXT3SH(5)) -#define INSN_VEXTRU (INSN_OP(0x34) | INSN_EXT3SH(4)) -#define INSN_VSHD (INSN_OP(0x34) | INSN_EXT3SH(0)) -#define INSN_XOR (INSN_OP(0x02) | INSN_EXT6(0x0a)) -#define INSN_ZDEP (INSN_OP(0x35) | INSN_EXT3SH(2)) -#define INSN_ZVDEP (INSN_OP(0x35) | INSN_EXT3SH(0)) - -#define INSN_BL (INSN_OP(0x3a) | INSN_EXT3BR(0)) -#define INSN_BL_N (INSN_OP(0x3a) | INSN_EXT3BR(0) | 2) -#define INSN_BLR (INSN_OP(0x3a) | INSN_EXT3BR(2)) -#define INSN_BV (INSN_OP(0x3a) | INSN_EXT3BR(6)) -#define INSN_BV_N (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2) -#define INSN_BLE_SR4 (INSN_OP(0x39) | (1 << 13)) - -#define INSN_LDB (INSN_OP(0x10)) -#define INSN_LDH (INSN_OP(0x11)) -#define INSN_LDW (INSN_OP(0x12)) -#define INSN_LDWM (INSN_OP(0x13)) -#define INSN_FLDDS (INSN_OP(0x0b) | INSN_EXT4(0) | (1 << 12)) - -#define INSN_LDBX (INSN_OP(0x03) | INSN_EXT4(0)) -#define INSN_LDHX (INSN_OP(0x03) | INSN_EXT4(1)) -#define INSN_LDWX (INSN_OP(0x03) | INSN_EXT4(2)) - -#define INSN_STB (INSN_OP(0x18)) -#define INSN_STH (INSN_OP(0x19)) -#define INSN_STW (INSN_OP(0x1a)) -#define INSN_STWM (INSN_OP(0x1b)) -#define INSN_FSTDS (INSN_OP(0x0b) | INSN_EXT4(8) | (1 << 12)) - -#define INSN_COMBT (INSN_OP(0x20)) -#define INSN_COMBF (INSN_OP(0x22)) -#define INSN_COMIBT (INSN_OP(0x21)) -#define INSN_COMIBF (INSN_OP(0x23)) - -/* supplied by libgcc */ -extern void *__canonicalize_funcptr_for_compare(const void *); - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) -{ - /* PA1.1 defines COPY as OR r,0,t; PA2.0 defines COPY as LDO 0(r),t - but hppa-dis.c is unaware of this definition */ - if (ret != arg) { - tcg_out32(s, INSN_OR | INSN_T(ret) | INSN_R1(arg) - | INSN_R2(TCG_REG_R0)); - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - if (check_fit_tl(arg, 14)) { - tcg_out32(s, INSN_LDO | INSN_R1(ret) - | INSN_R2(TCG_REG_R0) | INSN_IM14(arg)); - } else { - uint32_t hi, lo; - hi = arg >> 11; - lo = arg & 0x7ff; - - tcg_out32(s, INSN_LDIL | INSN_R2(ret) | reassemble_21(hi)); - if (lo) { - tcg_out32(s, INSN_LDO | INSN_R1(ret) - | INSN_R2(ret) | INSN_IM14(lo)); - } - } -} - -static void tcg_out_ldst(TCGContext *s, int ret, int addr, - tcg_target_long offset, int op) -{ - if (!check_fit_tl(offset, 14)) { - uint32_t hi, lo, op; - - hi = offset >> 11; - lo = offset & 0x7ff; - - if (addr == TCG_REG_R0) { - op = INSN_LDIL | INSN_R2(TCG_REG_R1); - } else { - op = INSN_ADDIL | INSN_R2(addr); - } - tcg_out32(s, op | reassemble_21(hi)); - - addr = TCG_REG_R1; - offset = lo; - } - - if (ret != addr || offset != 0 || op != INSN_LDO) { - tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | INSN_IM14(offset)); - } -} - -/* This function is required by tcg.c. */ -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, tcg_target_long arg2) -{ - tcg_out_ldst(s, ret, arg1, arg2, INSN_LDW); -} - -/* This function is required by tcg.c. */ -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, tcg_target_long arg2) -{ - tcg_out_ldst(s, ret, arg1, arg2, INSN_STW); -} - -static void tcg_out_ldst_index(TCGContext *s, int data, - int base, int index, int op) -{ - tcg_out32(s, op | INSN_T(data) | INSN_R1(index) | INSN_R2(base)); -} - -static inline void tcg_out_addi2(TCGContext *s, int ret, int arg1, - tcg_target_long val) -{ - tcg_out_ldst(s, ret, arg1, val, INSN_LDO); -} - -/* This function is required by tcg.c. */ -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - tcg_out_addi2(s, reg, reg, val); -} - -static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int op) -{ - tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2)); -} - -static inline void tcg_out_arithi(TCGContext *s, int t, int r1, - tcg_target_long val, int op) -{ - assert(check_fit_tl(val, 11)); - tcg_out32(s, op | INSN_R1(t) | INSN_R2(r1) | INSN_IM11(val)); -} - -static inline void tcg_out_nop(TCGContext *s) -{ - tcg_out_arith(s, TCG_REG_R0, TCG_REG_R0, TCG_REG_R0, INSN_OR); -} - -static inline void tcg_out_mtctl_sar(TCGContext *s, int arg) -{ - tcg_out32(s, INSN_MTCTL | INSN_R2(11) | INSN_R1(arg)); -} - -/* Extract LEN bits at position OFS from ARG and place in RET. - Note that here the bit ordering is reversed from the PA-RISC - standard, such that the right-most bit is 0. */ -static inline void tcg_out_extr(TCGContext *s, int ret, int arg, - unsigned ofs, unsigned len, int sign) -{ - assert(ofs < 32 && len <= 32 - ofs); - tcg_out32(s, (sign ? INSN_EXTRS : INSN_EXTRU) - | INSN_R1(ret) | INSN_R2(arg) - | INSN_SHDEP_P(31 - ofs) | INSN_DEP_LEN(len)); -} - -/* Likewise with OFS interpreted little-endian. */ -static inline void tcg_out_dep(TCGContext *s, int ret, int arg, - unsigned ofs, unsigned len) -{ - assert(ofs < 32 && len <= 32 - ofs); - tcg_out32(s, INSN_DEP | INSN_R2(ret) | INSN_R1(arg) - | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len)); -} - -static inline void tcg_out_depi(TCGContext *s, int ret, int arg, - unsigned ofs, unsigned len) -{ - assert(ofs < 32 && len <= 32 - ofs); - tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(arg) - | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len)); -} - -static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo, - unsigned count) -{ - assert(count < 32); - tcg_out32(s, INSN_SHD | INSN_R1(hi) | INSN_R2(lo) | INSN_T(ret) - | INSN_SHDEP_CP(count)); -} - -static void tcg_out_vshd(TCGContext *s, int ret, int hi, int lo, int creg) -{ - tcg_out_mtctl_sar(s, creg); - tcg_out32(s, INSN_VSHD | INSN_T(ret) | INSN_R1(hi) | INSN_R2(lo)); -} - -static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m) -{ - int bs0, bs1; - - /* Note that the argument is constrained to match or_mask_p. */ - for (bs0 = 0; bs0 < 32; bs0++) { - if ((m & (1u << bs0)) != 0) { - break; - } - } - for (bs1 = bs0; bs1 < 32; bs1++) { - if ((m & (1u << bs1)) == 0) { - break; - } - } - assert(bs1 == 32 || (1ul << bs1) > m); - - tcg_out_mov(s, TCG_TYPE_I32, ret, arg); - tcg_out_depi(s, ret, -1, bs0, bs1 - bs0); -} - -static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m) -{ - int ls0, ls1, ms0; - - /* Note that the argument is constrained to match and_mask_p. */ - for (ls0 = 0; ls0 < 32; ls0++) { - if ((m & (1u << ls0)) == 0) { - break; - } - } - for (ls1 = ls0; ls1 < 32; ls1++) { - if ((m & (1u << ls1)) != 0) { - break; - } - } - for (ms0 = ls1; ms0 < 32; ms0++) { - if ((m & (1u << ms0)) == 0) { - break; - } - } - assert (ms0 == 32); - - if (ls1 == 32) { - tcg_out_extr(s, ret, arg, 0, ls0, 0); - } else { - tcg_out_mov(s, TCG_TYPE_I32, ret, arg); - tcg_out_depi(s, ret, 0, ls0, ls1 - ls0); - } -} - -static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) -{ - tcg_out_extr(s, ret, arg, 0, 8, 1); -} - -static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) -{ - tcg_out_extr(s, ret, arg, 0, 16, 1); -} - -static void tcg_out_shli(TCGContext *s, int ret, int arg, int count) -{ - count &= 31; - tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg) - | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count)); -} - -static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg) -{ - tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI); - tcg_out_mtctl_sar(s, TCG_REG_R20); - tcg_out32(s, INSN_ZVDEP | INSN_R2(ret) | INSN_R1(arg) | INSN_DEP_LEN(32)); -} - -static void tcg_out_shri(TCGContext *s, int ret, int arg, int count) -{ - count &= 31; - tcg_out_extr(s, ret, arg, count, 32 - count, 0); -} - -static void tcg_out_shr(TCGContext *s, int ret, int arg, int creg) -{ - tcg_out_vshd(s, ret, TCG_REG_R0, arg, creg); -} - -static void tcg_out_sari(TCGContext *s, int ret, int arg, int count) -{ - count &= 31; - tcg_out_extr(s, ret, arg, count, 32 - count, 1); -} - -static void tcg_out_sar(TCGContext *s, int ret, int arg, int creg) -{ - tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI); - tcg_out_mtctl_sar(s, TCG_REG_R20); - tcg_out32(s, INSN_VEXTRS | INSN_R1(ret) | INSN_R2(arg) | INSN_DEP_LEN(32)); -} - -static void tcg_out_rotli(TCGContext *s, int ret, int arg, int count) -{ - count &= 31; - tcg_out_shd(s, ret, arg, arg, 32 - count); -} - -static void tcg_out_rotl(TCGContext *s, int ret, int arg, int creg) -{ - tcg_out_arithi(s, TCG_REG_R20, creg, 32, INSN_SUBI); - tcg_out_vshd(s, ret, arg, arg, TCG_REG_R20); -} - -static void tcg_out_rotri(TCGContext *s, int ret, int arg, int count) -{ - count &= 31; - tcg_out_shd(s, ret, arg, arg, count); -} - -static void tcg_out_rotr(TCGContext *s, int ret, int arg, int creg) -{ - tcg_out_vshd(s, ret, arg, arg, creg); -} - -static void tcg_out_bswap16(TCGContext *s, int ret, int arg, int sign) -{ - if (ret != arg) { - tcg_out_mov(s, TCG_TYPE_I32, ret, arg); /* arg = xxAB */ - } - tcg_out_dep(s, ret, ret, 16, 8); /* ret = xBAB */ - tcg_out_extr(s, ret, ret, 8, 16, sign); /* ret = ..BA */ -} - -static void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp) -{ - /* arg = ABCD */ - tcg_out_rotri(s, temp, arg, 16); /* temp = CDAB */ - tcg_out_dep(s, temp, temp, 16, 8); /* temp = CBAB */ - tcg_out_shd(s, ret, arg, temp, 8); /* ret = DCBA */ -} - -static void tcg_out_call(TCGContext *s, const void *func) -{ - tcg_target_long val, hi, lo, disp; - - val = (uint32_t)__canonicalize_funcptr_for_compare(func); - disp = (val - ((tcg_target_long)s->code_ptr + 8)) >> 2; - - if (check_fit_tl(disp, 17)) { - tcg_out32(s, INSN_BL_N | INSN_R2(TCG_REG_RP) | reassemble_17(disp)); - } else { - hi = val >> 11; - lo = val & 0x7ff; - - tcg_out32(s, INSN_LDIL | INSN_R2(TCG_REG_R20) | reassemble_21(hi)); - tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R20) - | reassemble_17(lo >> 2)); - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_RP, TCG_REG_R31); - } -} - -static void tcg_out_xmpyu(TCGContext *s, int retl, int reth, - int arg1, int arg2) -{ - /* Store both words into the stack for copy to the FPU. */ - tcg_out_ldst(s, arg1, TCG_REG_CALL_STACK, STACK_TEMP_OFS, INSN_STW); - tcg_out_ldst(s, arg2, TCG_REG_CALL_STACK, STACK_TEMP_OFS + 4, INSN_STW); - - /* Load both words into the FPU at the same time. We get away - with this because we can address the left and right half of the - FPU registers individually once loaded. */ - /* fldds stack_temp(sp),fr22 */ - tcg_out32(s, INSN_FLDDS | INSN_R2(TCG_REG_CALL_STACK) - | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22)); - - /* xmpyu fr22r,fr22,fr22 */ - tcg_out32(s, 0x3ad64796); - - /* Store the 64-bit result back into the stack. */ - /* fstds stack_temp(sp),fr22 */ - tcg_out32(s, INSN_FSTDS | INSN_R2(TCG_REG_CALL_STACK) - | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22)); - - /* Load the pieces of the result that the caller requested. */ - if (reth) { - tcg_out_ldst(s, reth, TCG_REG_CALL_STACK, STACK_TEMP_OFS, INSN_LDW); - } - if (retl) { - tcg_out_ldst(s, retl, TCG_REG_CALL_STACK, STACK_TEMP_OFS + 4, - INSN_LDW); - } -} - -static void tcg_out_add2(TCGContext *s, int destl, int desth, - int al, int ah, int bl, int bh, int blconst) -{ - int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl); - - if (blconst) { - tcg_out_arithi(s, tmp, al, bl, INSN_ADDI); - } else { - tcg_out_arith(s, tmp, al, bl, INSN_ADD); - } - tcg_out_arith(s, desth, ah, bh, INSN_ADDC); - - tcg_out_mov(s, TCG_TYPE_I32, destl, tmp); -} - -static void tcg_out_sub2(TCGContext *s, int destl, int desth, int al, int ah, - int bl, int bh, int alconst, int blconst) -{ - int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl); - - if (alconst) { - if (blconst) { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, bl); - bl = TCG_REG_R20; - } - tcg_out_arithi(s, tmp, bl, al, INSN_SUBI); - } else if (blconst) { - tcg_out_arithi(s, tmp, al, -bl, INSN_ADDI); - } else { - tcg_out_arith(s, tmp, al, bl, INSN_SUB); - } - tcg_out_arith(s, desth, ah, bh, INSN_SUBB); - - tcg_out_mov(s, TCG_TYPE_I32, destl, tmp); -} - -static void tcg_out_branch(TCGContext *s, int label_index, int nul) -{ - TCGLabel *l = &s->labels[label_index]; - uint32_t op = nul ? INSN_BL_N : INSN_BL; - - if (l->has_value) { - tcg_target_long val = l->u.value; - - val -= (tcg_target_long)s->code_ptr + 8; - val >>= 2; - assert(check_fit_tl(val, 17)); - - tcg_out32(s, op | reassemble_17(val)); - } else { - /* We need to keep the offset unchanged for retranslation. */ - uint32_t old_insn = *(uint32_t *)s->code_ptr; - - tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL17F, label_index, 0); - tcg_out32(s, op | (old_insn & 0x1f1ffdu)); - } -} - -static const uint8_t tcg_cond_to_cmp_cond[] = -{ - [TCG_COND_EQ] = COND_EQ, - [TCG_COND_NE] = COND_EQ | COND_FALSE, - [TCG_COND_LT] = COND_LT, - [TCG_COND_GE] = COND_LT | COND_FALSE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_LE | COND_FALSE, - [TCG_COND_LTU] = COND_LTU, - [TCG_COND_GEU] = COND_LTU | COND_FALSE, - [TCG_COND_LEU] = COND_LEU, - [TCG_COND_GTU] = COND_LEU | COND_FALSE, -}; - -static void tcg_out_brcond(TCGContext *s, int cond, TCGArg c1, - TCGArg c2, int c2const, int label_index) -{ - TCGLabel *l = &s->labels[label_index]; - int op, pacond; - - /* Note that COMIB operates as if the immediate is the first - operand. We model brcond with the immediate in the second - to better match what targets are likely to give us. For - consistency, model COMB with reversed operands as well. */ - pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)]; - - if (c2const) { - op = (pacond & COND_FALSE ? INSN_COMIBF : INSN_COMIBT); - op |= INSN_IM5(c2); - } else { - op = (pacond & COND_FALSE ? INSN_COMBF : INSN_COMBT); - op |= INSN_R1(c2); - } - op |= INSN_R2(c1); - op |= INSN_COND(pacond & 7); - - if (l->has_value) { - tcg_target_long val = l->u.value; - - val -= (tcg_target_long)s->code_ptr + 8; - val >>= 2; - assert(check_fit_tl(val, 12)); - - /* ??? Assume that all branches to defined labels are backward. - Which means that if the nul bit is set, the delay slot is - executed if the branch is taken, and not executed in fallthru. */ - tcg_out32(s, op | reassemble_12(val)); - tcg_out_nop(s); - } else { - /* We need to keep the offset unchanged for retranslation. */ - uint32_t old_insn = *(uint32_t *)s->code_ptr; - - tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL12F, label_index, 0); - /* ??? Assume that all branches to undefined labels are forward. - Which means that if the nul bit is set, the delay slot is - not executed if the branch is taken, which is what we want. */ - tcg_out32(s, op | 2 | (old_insn & 0x1ffdu)); - } -} - -static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) -{ - int op, pacond; - - /* Note that COMICLR operates as if the immediate is the first - operand. We model setcond with the immediate in the second - to better match what targets are likely to give us. For - consistency, model COMCLR with reversed operands as well. */ - pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)]; - - if (c2const) { - op = INSN_COMICLR | INSN_R2(c1) | INSN_R1(ret) | INSN_IM11(c2); - } else { - op = INSN_COMCLR | INSN_R2(c1) | INSN_R1(c2) | INSN_T(ret); - } - op |= INSN_COND(pacond & 7); - op |= pacond & COND_FALSE ? 1 << 12 : 0; - - tcg_out32(s, op); -} - -static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah, - TCGArg bl, int blconst, TCGArg bh, int bhconst, - int label_index) -{ - switch (cond) { - case TCG_COND_EQ: - tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst); - tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index); - break; - case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, al, bl, blconst, label_index); - tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index); - break; - default: - tcg_out_brcond(s, tcg_high_cond(cond), ah, bh, bhconst, label_index); - tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst); - tcg_out_brcond(s, tcg_unsigned_cond(cond), - al, bl, blconst, label_index); - break; - } -} - -static void tcg_out_setcond(TCGContext *s, int cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) -{ - tcg_out_comclr(s, tcg_invert_cond(cond), ret, c1, c2, c2const); - tcg_out_movi(s, TCG_TYPE_I32, ret, 1); -} - -static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret, - TCGArg al, TCGArg ah, TCGArg bl, int blconst, - TCGArg bh, int bhconst) -{ - int scratch = TCG_REG_R20; - - /* Note that the low parts are fully consumed before scratch is set. */ - if (ret != ah && (bhconst || ret != bh)) { - scratch = ret; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - tcg_out_setcond(s, cond, scratch, al, bl, blconst); - tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst); - tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE); - break; - - case TCG_COND_GE: - case TCG_COND_GEU: - case TCG_COND_LT: - case TCG_COND_LTU: - /* Optimize compares with low part zero. */ - if (bl == 0) { - tcg_out_setcond(s, cond, ret, ah, bh, bhconst); - return; - } - /* FALLTHRU */ - - case TCG_COND_LE: - case TCG_COND_LEU: - case TCG_COND_GT: - case TCG_COND_GTU: - /* <= : ah < bh | (ah == bh && al <= bl) */ - tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst); - tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst); - tcg_out_movi(s, TCG_TYPE_I32, scratch, 0); - tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond(cond)), - TCG_REG_R0, ah, bh, bhconst); - tcg_out_movi(s, TCG_TYPE_I32, scratch, 1); - break; - - default: - tcg_abort(); - } - - tcg_out_mov(s, TCG_TYPE_I32, ret, scratch); -} - -static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const, - TCGArg v1, int v1const) -{ - tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const); - if (v1const) { - tcg_out_movi(s, TCG_TYPE_I32, ret, v1); - } else { - tcg_out_mov(s, TCG_TYPE_I32, ret, v1); - } -} - -#if defined(CONFIG_SOFTMMU) -#include "exec/softmmu_defs.h" - -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, -}; - -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, -}; - -/* Load and compare a TLB entry, and branch if TLB miss. OFFSET is set to - the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate - TLB for the memory index. The return value is the offset from ENV - contained in R1 afterward (to be used when loading ADDEND); if the - return value is 0, R1 is not used. */ - -static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, - int addrhi, int s_bits, int lab_miss, int offset) -{ - int ret; - - /* Extracting the index into the TLB. The "normal C operation" is - r1 = addr_reg >> TARGET_PAGE_BITS; - r1 &= CPU_TLB_SIZE - 1; - r1 <<= CPU_TLB_ENTRY_BITS; - What this does is extract CPU_TLB_BITS beginning at TARGET_PAGE_BITS - and place them at CPU_TLB_ENTRY_BITS. We can combine the first two - operations with an EXTRU. Unfortunately, the current value of - CPU_TLB_ENTRY_BITS is > 3, so we can't merge that shift with the - add that follows. */ - tcg_out_extr(s, r1, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS, 0); - tcg_out_shli(s, r1, r1, CPU_TLB_ENTRY_BITS); - tcg_out_arith(s, r1, r1, TCG_AREG0, INSN_ADDL); - - /* Make sure that both the addr_{read,write} and addend can be - read with a 14-bit offset from the same base register. */ - if (check_fit_tl(offset + CPU_TLB_SIZE, 14)) { - ret = 0; - } else { - ret = (offset + 0x400) & ~0x7ff; - offset = ret - offset; - tcg_out_addi2(s, TCG_REG_R1, r1, ret); - r1 = TCG_REG_R1; - } - - /* Load the entry from the computed slot. */ - if (TARGET_LONG_BITS == 64) { - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R23, r1, offset); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset + 4); - } else { - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset); - } - - /* Compute the value that ought to appear in the TLB for a hit, namely, - the page of the address. We include the low N bits of the address - to catch unaligned accesses and force them onto the slow path. Do - this computation after having issued the load from the TLB slot to - give the load time to complete. */ - tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - - /* If not equal, jump to lab_miss. */ - if (TARGET_LONG_BITS == 64) { - tcg_out_brcond2(s, TCG_COND_NE, TCG_REG_R20, TCG_REG_R23, - r0, 0, addrhi, 0, lab_miss); - } else { - tcg_out_brcond(s, TCG_COND_NE, TCG_REG_R20, r0, 0, lab_miss); - } - - return ret; -} - -static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst) -{ - if (argno < 4) { - if (vconst) { - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); - } else { - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); - } - } else { - if (vconst && v != 0) { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v); - v = TCG_REG_R20; - } - tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4)); - } - return argno + 1; -} - -static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh) -{ - /* 64-bit arguments must go in even reg pairs and stack slots. */ - if (argno & 1) { - argno++; - } - argno = tcg_out_arg_reg32(s, argno, vl, false); - argno = tcg_out_arg_reg32(s, argno, vh, false); - return argno; -} -#endif - -static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg, - int addr_reg, int addend_reg, int opc) -{ -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif - - switch (opc) { - case 0: - tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX); - break; - case 0 | 4: - tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX); - tcg_out_ext8s(s, datalo_reg, datalo_reg); - break; - case 1: - tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX); - if (bswap) { - tcg_out_bswap16(s, datalo_reg, datalo_reg, 0); - } - break; - case 1 | 4: - tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX); - if (bswap) { - tcg_out_bswap16(s, datalo_reg, datalo_reg, 1); - } else { - tcg_out_ext16s(s, datalo_reg, datalo_reg); - } - break; - case 2: - tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDWX); - if (bswap) { - tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20); - } - break; - case 3: - if (bswap) { - int t = datahi_reg; - datahi_reg = datalo_reg; - datalo_reg = t; - } - /* We can't access the low-part with a reg+reg addressing mode, - so perform the addition now and use reg_ofs addressing mode. */ - if (addend_reg != TCG_REG_R0) { - tcg_out_arith(s, TCG_REG_R20, addr_reg, addend_reg, INSN_ADD); - addr_reg = TCG_REG_R20; - } - /* Make sure not to clobber the base register. */ - if (datahi_reg == addr_reg) { - tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW); - tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW); - } else { - tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW); - tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW); - } - if (bswap) { - tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20); - tcg_out_bswap32(s, datahi_reg, datahi_reg, TCG_REG_R20); - } - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) -{ - int datalo_reg = *args++; - /* Note that datahi_reg is only used for 64-bit loads. */ - int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0); - int addrlo_reg = *args++; - -#if defined(CONFIG_SOFTMMU) - /* Note that addrhi_reg is only used for 64-bit guests. */ - int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); - int mem_index = *args; - int lab1, lab2, argno, offset; - - lab1 = gen_new_label(); - lab2 = gen_new_label(); - - offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, - addrhi_reg, opc & 3, lab1, offset); - - /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, - (offset ? TCG_REG_R1 : TCG_REG_R25), - offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); - tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, - TCG_REG_R20, opc); - tcg_out_branch(s, lab2, 1); - - /* TLB Miss. */ - /* label1: */ - tcg_out_label(s, lab1, s->code_ptr); - - argno = 0; - argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); - if (TARGET_LONG_BITS == 64) { - argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); - } else { - argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); - } - argno = tcg_out_arg_reg32(s, argno, mem_index, true); - - tcg_out_call(s, qemu_ld_helpers[opc & 3]); - - switch (opc) { - case 0: - tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xff); - break; - case 0 | 4: - tcg_out_ext8s(s, datalo_reg, TCG_REG_RET0); - break; - case 1: - tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xffff); - break; - case 1 | 4: - tcg_out_ext16s(s, datalo_reg, TCG_REG_RET0); - break; - case 2: - case 2 | 4: - tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET0); - break; - case 3: - tcg_out_mov(s, TCG_TYPE_I32, datahi_reg, TCG_REG_RET0); - tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET1); - break; - default: - tcg_abort(); - } - - /* label2: */ - tcg_out_label(s, lab2, s->code_ptr); -#else - tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0), opc); -#endif -} - -static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, - int datahi_reg, int addr_reg, int opc) -{ -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 0; -#else - const int bswap = 1; -#endif - - switch (opc) { - case 0: - tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STB); - break; - case 1: - if (bswap) { - tcg_out_bswap16(s, TCG_REG_R20, datalo_reg, 0); - datalo_reg = TCG_REG_R20; - } - tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STH); - break; - case 2: - if (bswap) { - tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20); - datalo_reg = TCG_REG_R20; - } - tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STW); - break; - case 3: - if (bswap) { - tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20); - tcg_out_bswap32(s, TCG_REG_R23, datahi_reg, TCG_REG_R23); - datahi_reg = TCG_REG_R20; - datalo_reg = TCG_REG_R23; - } - tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_STW); - tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_STW); - break; - default: - tcg_abort(); - } - -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) -{ - int datalo_reg = *args++; - /* Note that datahi_reg is only used for 64-bit loads. */ - int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0); - int addrlo_reg = *args++; - -#if defined(CONFIG_SOFTMMU) - /* Note that addrhi_reg is only used for 64-bit guests. */ - int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); - int mem_index = *args; - int lab1, lab2, argno, next, offset; - - lab1 = gen_new_label(); - lab2 = gen_new_label(); - - offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, - addrhi_reg, opc, lab1, offset); - - /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, - (offset ? TCG_REG_R1 : TCG_REG_R25), - offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); - - /* There are no indexed stores, so we must do this addition explitly. - Careful to avoid R20, which is used for the bswaps to follow. */ - tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_REG_R20, INSN_ADDL); - tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, TCG_REG_R31, opc); - tcg_out_branch(s, lab2, 1); - - /* TLB Miss. */ - /* label1: */ - tcg_out_label(s, lab1, s->code_ptr); - - argno = 0; - argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); - if (TARGET_LONG_BITS == 64) { - argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); - } else { - argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); - } - - next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20); - switch(opc) { - case 0: - tcg_out_andi(s, next, datalo_reg, 0xff); - argno = tcg_out_arg_reg32(s, argno, next, false); - break; - case 1: - tcg_out_andi(s, next, datalo_reg, 0xffff); - argno = tcg_out_arg_reg32(s, argno, next, false); - break; - case 2: - argno = tcg_out_arg_reg32(s, argno, datalo_reg, false); - break; - case 3: - argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg); - break; - default: - tcg_abort(); - } - argno = tcg_out_arg_reg32(s, argno, mem_index, true); - - tcg_out_call(s, qemu_st_helpers[opc]); - - /* label2: */ - tcg_out_label(s, lab2, s->code_ptr); -#else - /* There are no indexed stores, so if GUEST_BASE is set we must do - the add explicitly. Careful to avoid R20, which is used for the - bswaps to follow. */ - if (GUEST_BASE != 0) { - tcg_out_arith(s, TCG_REG_R31, addrlo_reg, - TCG_GUEST_BASE_REG, INSN_ADDL); - addrlo_reg = TCG_REG_R31; - } - tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc); -#endif -} - -static void tcg_out_exit_tb(TCGContext *s, TCGArg arg) -{ - if (!check_fit_tl(arg, 14)) { - uint32_t hi, lo; - hi = arg & ~0x7ff; - lo = arg & 0x7ff; - if (lo) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, hi); - tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18)); - tcg_out_addi(s, TCG_REG_RET0, lo); - return; - } - arg = hi; - } - tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18)); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, arg); -} - -static void tcg_out_goto_tb(TCGContext *s, TCGArg arg) -{ - if (s->tb_jmp_offset) { - /* direct jump method */ - fprintf(stderr, "goto_tb direct\n"); - tcg_abort(); - } else { - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, TCG_REG_R0, - (tcg_target_long)(s->tb_next + arg)); - tcg_out32(s, INSN_BV_N | INSN_R2(TCG_REG_R20)); - } - s->tb_next_offset[arg] = s->code_ptr - s->code_buf; -} - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_exit_tb(s, args[0]); - break; - case INDEX_op_goto_tb: - tcg_out_goto_tb(s, args[0]); - break; - - case INDEX_op_call: - if (const_args[0]) { - tcg_out_call(s, (void *)args[0]); - } else { - /* ??? FIXME: the value in the register in args[0] is almost - certainly a procedure descriptor, not a code address. We - probably need to use the millicode $$dyncall routine. */ - tcg_abort(); - } - break; - - case INDEX_op_br: - tcg_out_branch(s, args[0], 1); - break; - - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]); - break; - - case INDEX_op_ld8u_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB); - break; - case INDEX_op_ld8s_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB); - tcg_out_ext8s(s, args[0], args[0]); - break; - case INDEX_op_ld16u_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH); - break; - case INDEX_op_ld16s_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH); - tcg_out_ext16s(s, args[0], args[0]); - break; - case INDEX_op_ld_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDW); - break; - - case INDEX_op_st8_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_STB); - break; - case INDEX_op_st16_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_STH); - break; - case INDEX_op_st_i32: - tcg_out_ldst(s, args[0], args[1], args[2], INSN_STW); - break; - - case INDEX_op_add_i32: - if (const_args[2]) { - tcg_out_addi2(s, args[0], args[1], args[2]); - } else { - tcg_out_arith(s, args[0], args[1], args[2], INSN_ADDL); - } - break; - - case INDEX_op_sub_i32: - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1] - args[2]); - } else { - /* Recall that SUBI is a reversed subtract. */ - tcg_out_arithi(s, args[0], args[2], args[1], INSN_SUBI); - } - } else if (const_args[2]) { - tcg_out_addi2(s, args[0], args[1], -args[2]); - } else { - tcg_out_arith(s, args[0], args[1], args[2], INSN_SUB); - } - break; - - case INDEX_op_and_i32: - if (const_args[2]) { - tcg_out_andi(s, args[0], args[1], args[2]); - } else { - tcg_out_arith(s, args[0], args[1], args[2], INSN_AND); - } - break; - - case INDEX_op_or_i32: - if (const_args[2]) { - tcg_out_ori(s, args[0], args[1], args[2]); - } else { - tcg_out_arith(s, args[0], args[1], args[2], INSN_OR); - } - break; - - case INDEX_op_xor_i32: - tcg_out_arith(s, args[0], args[1], args[2], INSN_XOR); - break; - - case INDEX_op_andc_i32: - if (const_args[2]) { - tcg_out_andi(s, args[0], args[1], ~args[2]); - } else { - tcg_out_arith(s, args[0], args[1], args[2], INSN_ANDCM); - } - break; - - case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_shli(s, args[0], args[1], args[2]); - } else { - tcg_out_shl(s, args[0], args[1], args[2]); - } - break; - - case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_shri(s, args[0], args[1], args[2]); - } else { - tcg_out_shr(s, args[0], args[1], args[2]); - } - break; - - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_sari(s, args[0], args[1], args[2]); - } else { - tcg_out_sar(s, args[0], args[1], args[2]); - } - break; - - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rotli(s, args[0], args[1], args[2]); - } else { - tcg_out_rotl(s, args[0], args[1], args[2]); - } - break; - - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rotri(s, args[0], args[1], args[2]); - } else { - tcg_out_rotr(s, args[0], args[1], args[2]); - } - break; - - case INDEX_op_mul_i32: - tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]); - break; - case INDEX_op_mulu2_i32: - tcg_out_xmpyu(s, args[0], args[1], args[2], args[3]); - break; - - case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, args[0], args[1], 0); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, args[0], args[1], TCG_REG_R20); - break; - - case INDEX_op_not_i32: - tcg_out_arithi(s, args[0], args[1], -1, INSN_SUBI); - break; - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, args[0], args[1]); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], args[0], args[1], - args[2], const_args[2], - args[3], const_args[3], args[5]); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], args[4], const_args[4]); - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2], - args[3], const_args[3]); - break; - - case INDEX_op_add2_i32: - tcg_out_add2(s, args[0], args[1], args[2], args[3], - args[4], args[5], const_args[4]); - break; - - case INDEX_op_sub2_i32: - tcg_out_sub2(s, args[0], args[1], args[2], args[3], - args[4], args[5], const_args[2], const_args[4]); - break; - - case INDEX_op_deposit_i32: - if (const_args[2]) { - tcg_out_depi(s, args[0], args[2], args[3], args[4]); - } else { - tcg_out_dep(s, args[0], args[2], args[3], args[4]); - } - break; - - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 1); - break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); - break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); - break; - - default: - fprintf(stderr, "unknown opcode 0x%x\n", opc); - tcg_abort(); - } -} - -static const TCGTargetOpDef hppa_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - - { INDEX_op_call, { "ri" } }, - { INDEX_op_br, { } }, - - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "ri" } }, - { INDEX_op_sub_i32, { "r", "rI", "ri" } }, - { INDEX_op_and_i32, { "r", "rZ", "rM" } }, - { INDEX_op_or_i32, { "r", "rZ", "rO" } }, - { INDEX_op_xor_i32, { "r", "rZ", "rZ" } }, - /* Note that the second argument will be inverted, which means - we want a constant whose inversion matches M, and that O = ~M. - See the implementation of and_mask_p. */ - { INDEX_op_andc_i32, { "r", "rZ", "rO" } }, - - { INDEX_op_mul_i32, { "r", "r", "r" } }, - { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - - { INDEX_op_brcond_i32, { "rZ", "rJ" } }, - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, - - { INDEX_op_setcond_i32, { "r", "rZ", "rI" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } }, - - /* ??? We can actually support a signed 14-bit arg3, but we - only have existing constraints for a signed 11-bit. */ - { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rJ" } }, - -#if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, - - { INDEX_op_qemu_st8, { "LZ", "L" } }, - { INDEX_op_qemu_st16, { "LZ", "L" } }, - { INDEX_op_qemu_st32, { "LZ", "L" } }, - { INDEX_op_qemu_st64, { "LZ", "LZ", "L" } }, -#else - { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, - - { INDEX_op_qemu_st8, { "LZ", "L", "L" } }, - { INDEX_op_qemu_st16, { "LZ", "L", "L" } }, - { INDEX_op_qemu_st32, { "LZ", "L", "L" } }, - { INDEX_op_qemu_st64, { "LZ", "LZ", "L", "L" } }, -#endif - { -1 }, -}; - -static int tcg_target_callee_save_regs[] = { - /* R2, the return address register, is saved specially - in the caller's frame. */ - /* R3, the frame pointer, is not currently modified. */ - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, /* R17 is the global env. */ - TCG_REG_R18 -}; - -#define FRAME_SIZE ((-TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_TARGET_STATIC_CALL_ARGS_SIZE \ - + ARRAY_SIZE(tcg_target_callee_save_regs) * 4 \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & -TCG_TARGET_STACK_ALIGN) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int frame_size, i; - - frame_size = FRAME_SIZE; - - /* The return address is stored in the caller's frame. */ - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_CALL_STACK, -20); - - /* Allocate stack frame, saving the first register at the same time. */ - tcg_out_ldst(s, tcg_target_callee_save_regs[0], - TCG_REG_CALL_STACK, frame_size, INSN_STWM); - - /* Save all callee saved registers. */ - for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { - tcg_out_st(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i], - TCG_REG_CALL_STACK, -frame_size + i * 4); - } - - /* Record the location of the TCG temps. */ - tcg_set_frame(s, TCG_REG_CALL_STACK, -frame_size + i * 4, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - - /* Jump to TB, and adjust R18 to be the return address. */ - tcg_out32(s, INSN_BLE_SR4 | INSN_R2(tcg_target_call_iarg_regs[1])); - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R18, TCG_REG_R31); - - /* Restore callee saved registers. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_CALL_STACK, - -frame_size - 20); - for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { - tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i], - TCG_REG_CALL_STACK, -frame_size + i * 4); - } - - /* Deallocate stack frame and return. */ - tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_RP)); - tcg_out_ldst(s, tcg_target_callee_save_regs[0], - TCG_REG_CALL_STACK, -frame_size, INSN_LDWM); -} - -static void tcg_target_init(TCGContext *s) -{ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET0); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET1); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* hardwired to zero */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* addil target */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RP); /* link register */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* frame pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R18); /* return pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R19); /* clobbered w/o pic */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R20); /* reserved */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_DP); /* data pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R31); /* ble link reg */ - - tcg_add_target_add_op_defs(hppa_op_defs); -} - -typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[4]; - uint8_t fde_ret_ofs[3]; - uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; -} DebugFrame; - -#define ELF_HOST_MACHINE EM_PARISC -#define ELF_HOST_FLAGS EFA_PARISC_1_1 - -/* ??? BFD (and thus GDB) wants very much to distinguish between HPUX - and other extensions. We don't really care, but if we don't set this - to *something* then the object file won't be properly matched. */ -#define ELF_OSABI ELFOSABI_LINUX - -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = 1, - .cie.return_column = 2, - - /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), - - .fde_def_cfa = { - 0x12, 30, /* DW_CFA_def_cfa_sf sp, ... */ - (-FRAME_SIZE & 0x7f) | 0x80, /* ... sleb128 -FRAME_SIZE */ - (-FRAME_SIZE >> 7) & 0x7f - }, - .fde_ret_ofs = { - 0x11, 2, (-20 / 4) & 0x7f /* DW_CFA_offset_extended_sf r2, 20 */ - }, - .fde_reg_ofs = { - /* This must match the ordering in tcg_target_callee_save_regs. */ - 0x80 + 4, 0, /* DW_CFA_offset r4, 0 */ - 0x80 + 5, 4, /* DW_CFA_offset r5, 4 */ - 0x80 + 6, 8, /* DW_CFA_offset r6, 8 */ - 0x80 + 7, 12, /* ... */ - 0x80 + 8, 16, - 0x80 + 9, 20, - 0x80 + 10, 24, - 0x80 + 11, 28, - 0x80 + 12, 32, - 0x80 + 13, 36, - 0x80 + 14, 40, - 0x80 + 15, 44, - 0x80 + 16, 48, - 0x80 + 17, 52, - 0x80 + 18, 56, - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - debug_frame.fde.func_start = (tcg_target_long) buf; - debug_frame.fde.func_len = buf_size; - - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h deleted file mode 100644 index 25467bdd43..0000000000 --- a/tcg/hppa/tcg-target.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef TCG_TARGET_HPPA -#define TCG_TARGET_HPPA 1 - -#if TCG_TARGET_REG_BITS != 32 -#error unsupported -#endif - -#define TCG_TARGET_WORDS_BIGENDIAN - -#define TCG_TARGET_NB_REGS 32 - -typedef enum { - TCG_REG_R0 = 0, - TCG_REG_R1, - TCG_REG_RP, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_DP, - TCG_REG_RET0, - TCG_REG_RET1, - TCG_REG_SP, - TCG_REG_R31, -} TCGReg; - -#define TCG_CT_CONST_0 0x0100 -#define TCG_CT_CONST_S5 0x0200 -#define TCG_CT_CONST_S11 0x0400 -#define TCG_CT_CONST_MS11 0x0800 -#define TCG_CT_CONST_AND 0x1000 -#define TCG_CT_CONST_OR 0x2000 - -/* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_SP -#define TCG_TARGET_STACK_ALIGN 64 -#define TCG_TARGET_CALL_STACK_OFFSET -48 -#define TCG_TARGET_STATIC_CALL_ARGS_SIZE 8*4 -#define TCG_TARGET_CALL_ALIGN_ARGS 1 -#define TCG_TARGET_STACK_GROWSUP - -/* optional instructions */ -#define TCG_TARGET_HAS_div_i32 0 -#define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 -#define TCG_TARGET_HAS_muls2_i32 0 - -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* and rd, rs, 0xff */ -#define TCG_TARGET_HAS_ext16u_i32 0 /* and rd, rs, 0xffff */ - -#define TCG_AREG0 TCG_REG_R17 - - -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) -{ - start &= ~31; - while (start <= stop) { - asm volatile ("fdc 0(%0)\n\t" - "sync\n\t" - "fic 0(%%sr4, %0)\n\t" - "sync" - : : "r"(start) : "memory"); - start += 32; - } -} - -#endif diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 3d87876734..587e11ee5b 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-ldst.h" + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #if TCG_TARGET_REG_BITS == 64 @@ -115,7 +117,7 @@ static bool have_cmov; static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { value += addend; switch(type) { @@ -193,11 +195,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) /* qemu_ld/st address constraint */ case 'L': ct->ct |= TCG_CT_REG; -#if TCG_TARGET_REG_BITS == 64 + if (TCG_TARGET_REG_BITS == 64) { tcg_regset_set32(ct->u.regs, 0, 0xffff); -#else + } else { tcg_regset_set32(ct->u.regs, 0, 0xff); -#endif + } tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); break; @@ -433,8 +435,7 @@ static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) that will follow the instruction. */ static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, - int index, int shift, - tcg_target_long offset) + int index, int shift, intptr_t offset) { int mod, len; @@ -442,8 +443,8 @@ static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, if (TCG_TARGET_REG_BITS == 64) { /* Try for a rip-relative addressing mode. This has replaced the 32-bit-mode absolute addressing encoding. */ - tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm; - tcg_target_long disp = offset - pc; + intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm; + intptr_t disp = offset - pc; if (disp == (int32_t)disp) { tcg_out_opc(s, opc, r, 0, 0); tcg_out8(s, (LOWREGMASK(r) << 3) | 5); @@ -517,7 +518,7 @@ static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, /* A simplification of the above with no index or shift. */ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, - int rm, tcg_target_long offset) + int rm, intptr_t offset) { tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); } @@ -544,20 +545,34 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type, static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { + tcg_target_long diff; + if (arg == 0) { tgen_arithr(s, ARITH_XOR, ret, ret); return; - } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + } + if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); tcg_out32(s, arg); - } else if (arg == (int32_t)arg) { + return; + } + if (arg == (int32_t)arg) { tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); tcg_out32(s, arg); - } else { - tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); - tcg_out32(s, arg); - tcg_out32(s, arg >> 31 >> 1); + return; + } + + /* Try a 7 byte pc-relative lea before the 10 byte movq. */ + diff = arg - ((uintptr_t)s->code_ptr + 7); + if (diff == (int32_t)diff) { + tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); + tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); + tcg_out32(s, diff); + return; } + + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); + tcg_out64(s, arg); } static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) @@ -584,19 +599,27 @@ static inline void tcg_out_pop(TCGContext *s, int reg) } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); tcg_out_modrm_offset(s, opc, ret, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); tcg_out_modrm_offset(s, opc, arg, arg1, arg2); } +static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, + tcg_target_long ofs, tcg_target_long val) +{ + int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, 0, base, ofs); + tcg_out32(s, val); +} + static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) { /* Propagate an opcode prefix, such as P_DATA16. */ @@ -738,7 +761,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) TCGLabel *l = &s->labels[label_index]; if (l->has_value) { - val = l->u.value - (tcg_target_long)s->code_ptr; + val = l->u.value - (intptr_t)s->code_ptr; val1 = val - 2; if ((int8_t)val1 == val1) { if (opc == -1) { @@ -978,9 +1001,9 @@ static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, } #endif -static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) +static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest) { - tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5; + intptr_t disp = dest - (intptr_t)s->code_ptr - 5; if (disp == (int32_t)disp) { tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); @@ -992,54 +1015,47 @@ static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) } } -static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest) +static inline void tcg_out_calli(TCGContext *s, uintptr_t dest) { tcg_out_branch(s, 1, dest); } -static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) +static void tcg_out_jmp(TCGContext *s, uintptr_t dest) { tcg_out_branch(s, 0, dest); } #if defined(CONFIG_SOFTMMU) - -#include "exec/softmmu_defs.h" - -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void *qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void *qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; -static void add_qemu_ldst_label(TCGContext *s, - int is_ld, - int opc, - int data_reg, - int data_reg2, - int addrlo_reg, - int addrhi_reg, - int mem_index, - uint8_t *raddr, - uint8_t **label_ptr); - /* Perform the TLB load and compare. Inputs: - ADDRLO_IDX contains the index into ARGS of the low part of the - address; the high part of the address is at ADDR_LOW_IDX+1. + ADDRLO and ADDRHI contain the low and high part of the address. MEM_INDEX and S_BITS are the memory context and log2 size of the load. @@ -1057,41 +1073,52 @@ static void add_qemu_ldst_label(TCGContext *s, First argument register is clobbered. */ -static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, - int mem_index, int s_bits, - const TCGArg *args, +static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + int mem_index, TCGMemOp s_bits, uint8_t **label_ptr, int which) { - const int addrlo = args[addrlo_idx]; - const int r0 = TCG_REG_L0; - const int r1 = TCG_REG_L1; - TCGType type = TCG_TYPE_I32; - int rexw = 0; + const TCGReg r0 = TCG_REG_L0; + const TCGReg r1 = TCG_REG_L1; + TCGType ttype = TCG_TYPE_I32; + TCGType htype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0; - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) { - type = TCG_TYPE_I64; - rexw = P_REXW; + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { + ttype = TCG_TYPE_I64; + trexw = P_REXW; + } + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + htype = TCG_TYPE_I64; + hrexw = P_REXW; + } } - tcg_out_mov(s, type, r0, addrlo); - tcg_out_mov(s, type, r1, addrlo); + tcg_out_mov(s, htype, r0, addrlo); + tcg_out_mov(s, ttype, r1, addrlo); - tcg_out_shifti(s, SHIFT_SHR + rexw, r0, + tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + rexw, r1, + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); - tgen_arithi(s, ARITH_AND + rexw, r0, + tgen_arithi(s, ARITH_AND + hrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0, + tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, offsetof(CPUArchState, tlb_table[mem_index][0]) + which); /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); - tcg_out_mov(s, type, r1, addrlo); + /* Prepare for both the fast path add of the tlb addend, and the slow + path function argument setup. There are two cases worth note: + For 32-bit guest and x86_64 host, MOVL zero-extends the guest address + before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ + copies the entire guest address for the slow path, while truncation + for the 32-bit host happens with the fastpath ADDL below. */ + tcg_out_mov(s, ttype, r1, addrlo); /* jne slow_path */ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); @@ -1100,7 +1127,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { /* cmp 4(r0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); /* jne slow_path */ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); @@ -1111,9 +1138,185 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, /* TLB Hit. */ /* add addend(r0), r1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0, + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, offsetof(CPUTLBEntry, addend) - which); } + +/* + * Record the context of a call to the out of line helper code for the slow path + * for a load or store, so that we can later generate the correct helper code + */ +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + int mem_index, uint8_t *raddr, + uint8_t **label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + label->label_ptr[1] = label_ptr[1]; + } +} + +/* + * Generate code for the slow path for a load at the end of block + */ +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGReg data_reg; + uint8_t **label_ptr = &l->label_ptr[0]; + + /* resolve label address */ + *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); + ofs += 4; + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], + l->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], + (uintptr_t)l->raddr); + } + + tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]); + + data_reg = l->datalo_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case MO_SW: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; +#if TCG_TARGET_REG_BITS == 64 + case MO_SL: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case MO_UB: + case MO_UW: + /* Note that the helpers have zero-extended to tcg_target_long. */ + case MO_UL: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; + case MO_Q: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); + } + break; + default: + tcg_abort(); + } + + /* Jump to the code corresponding to next IR of qemu_st */ + tcg_out_jmp(s, (uintptr_t)l->raddr); +} + +/* + * Generate code for the slow path for a store at the end of block + */ +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGMemOp s_bits = opc & MO_SIZE; + uint8_t **label_ptr = &l->label_ptr[0]; + TCGReg retaddr; + + /* resolve label address */ + *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (s_bits == MO_64) { + tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); + ofs += 4; + + retaddr = TCG_REG_EAX; + tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + tcg_target_call_iarg_regs[2], l->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], + l->mem_index); + + if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { + retaddr = tcg_target_call_iarg_regs[4]; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + } else { + retaddr = TCG_REG_RAX; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0); + } + } + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_push(s, retaddr); + tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]); +} #elif defined(__x86_64__) && defined(__linux__) # include <asm/prctl.h> # include <sys/prctl.h> @@ -1132,29 +1335,26 @@ static inline void setup_guest_base_seg(void) static inline void setup_guest_base_seg(void) { } #endif /* SOFTMMU */ -static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, - int base, tcg_target_long ofs, int seg, - int sizeop) +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, intptr_t ofs, int seg, + TCGMemOp memop) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 1; -#else - const int bswap = 0; -#endif - switch (sizeop) { - case 0: + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SSIZE) { + case MO_UB: tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs); break; - case 0 | 4: + case MO_SB: tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs); break; - case 1: + case MO_UW: tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); if (bswap) { tcg_out_rolw_8(s, datalo); } break; - case 1 | 4: + case MO_SW: if (bswap) { tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); tcg_out_rolw_8(s, datalo); @@ -1164,14 +1364,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, datalo, base, ofs); } break; - case 2: + case MO_UL: tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); if (bswap) { tcg_out_bswap32(s, datalo); } break; #if TCG_TARGET_REG_BITS == 64 - case 2 | 4: + case MO_SL: if (bswap) { tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); tcg_out_bswap32(s, datalo); @@ -1181,7 +1381,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, } break; #endif - case 3: + case MO_Q: if (TCG_TARGET_REG_BITS == 64) { tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg, datalo, base, ofs); @@ -1219,48 +1419,40 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and EAX. It will be useful once fixed registers globals are less common. */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) { - int data_reg, data_reg2 = 0; - int addrlo_idx; + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - int mem_index, s_bits; + int mem_index; + TCGMemOp s_bits; uint8_t *label_ptr[2]; #endif - data_reg = args[0]; - addrlo_idx = 1; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { - data_reg2 = args[1]; - addrlo_idx = 2; - } + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + opc = *args++; #if defined(CONFIG_SOFTMMU) - mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; - s_bits = opc & 3; + mem_index = *args++; + s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc); + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, - 1, - opc, - data_reg, - data_reg2, - args[addrlo_idx], - args[addrlo_idx + 1], - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); #else { int32_t offset = GUEST_BASE; - int base = args[addrlo_idx]; + TCGReg base = addrlo; int seg = 0; /* ??? We assume all operations have left us with register contents @@ -1278,32 +1470,35 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, offset = 0; } - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc); + tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc); } #endif } -static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, - int base, tcg_target_long ofs, int seg, - int sizeop) +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, intptr_t ofs, int seg, + TCGMemOp memop) { -#ifdef TARGET_WORDS_BIGENDIAN - const int bswap = 1; -#else - const int bswap = 0; -#endif + const TCGMemOp bswap = memop & MO_BSWAP; + /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value instead of moving to the scratch. But as it is, the L constraint means that TCG_REG_L0 is definitely free here. */ - const int scratch = TCG_REG_L0; + const TCGReg scratch = TCG_REG_L0; - switch (sizeop) { - case 0: + switch (memop & MO_SIZE) { + case MO_8: + /* In 32-bit mode, 8-byte stores can only happen from [abcd]x. + Use the scratch register if necessary. */ + if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + datalo = scratch; + } tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, datalo, base, ofs); break; - case 1: + case MO_16: if (bswap) { tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); tcg_out_rolw_8(s, scratch); @@ -1312,7 +1507,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg, datalo, base, ofs); break; - case 2: + case MO_32: if (bswap) { tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); tcg_out_bswap32(s, scratch); @@ -1320,7 +1515,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, } tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); break; - case 3: + case MO_64: if (TCG_TARGET_REG_BITS == 64) { if (bswap) { tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); @@ -1346,48 +1541,40 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, } } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) { - int data_reg, data_reg2 = 0; - int addrlo_idx; + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - int mem_index, s_bits; + int mem_index; + TCGMemOp s_bits; uint8_t *label_ptr[2]; #endif - data_reg = args[0]; - addrlo_idx = 1; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { - data_reg2 = args[1]; - addrlo_idx = 2; - } + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + opc = *args++; #if defined(CONFIG_SOFTMMU) - mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; - s_bits = opc; + mem_index = *args++; + s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ - tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc); + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, - 0, - opc, - data_reg, - data_reg2, - args[addrlo_idx], - args[addrlo_idx + 1], - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); #else { int32_t offset = GUEST_BASE; - int base = args[addrlo_idx]; + TCGReg base = addrlo; int seg = 0; /* ??? We assume all operations have left us with register contents @@ -1405,266 +1592,11 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, offset = 0; } - tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc); + tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); } #endif } -#if defined(CONFIG_SOFTMMU) -/* - * Record the context of a call to the out of line helper code for the slow path - * for a load or store, so that we can later generate the correct helper code - */ -static void add_qemu_ldst_label(TCGContext *s, - int is_ld, - int opc, - int data_reg, - int data_reg2, - int addrlo_reg, - int addrhi_reg, - int mem_index, - uint8_t *raddr, - uint8_t **label_ptr) -{ - int idx; - TCGLabelQemuLdst *label; - - if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { - tcg_abort(); - } - - idx = s->nb_qemu_ldst_labels++; - label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; - label->is_ld = is_ld; - label->opc = opc; - label->datalo_reg = data_reg; - label->datahi_reg = data_reg2; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->mem_index = mem_index; - label->raddr = raddr; - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} - -/* - * Generate code for the slow path for a load at the end of block - */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label) -{ - int s_bits; - int opc = label->opc; - int mem_index = label->mem_index; -#if TCG_TARGET_REG_BITS == 32 - int stack_adjust; - int addrlo_reg = label->addrlo_reg; - int addrhi_reg = label->addrhi_reg; -#endif - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; - - s_bits = opc & 3; - - /* resolve label address */ - *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); - } - -#if TCG_TARGET_REG_BITS == 32 - tcg_out_pushi(s, mem_index); - stack_adjust = 4; - if (TARGET_LONG_BITS == 64) { - tcg_out_push(s, addrhi_reg); - stack_adjust += 4; - } - tcg_out_push(s, addrlo_reg); - stack_adjust += 4; - tcg_out_push(s, TCG_AREG0); - stack_adjust += 4; -#else - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); -#endif - - /* Code generation of qemu_ld/st's slow path calling MMU helper - - PRE_PROC ... - call MMU helper - jmp POST_PROC (2b) : short forward jump <- GETRA() - jmp next_code (5b) : dummy long backward jump which is never executed - POST_PROC ... : do post-processing <- GETRA() + 7 - jmp next_code : jump to the code corresponding to next IR of qemu_ld/st - */ - - tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); - - /* Jump to post-processing code */ - tcg_out8(s, OPC_JMP_short); - tcg_out8(s, 5); - /* Dummy backward jump having information of fast path'pc for MMU helpers */ - tcg_out8(s, OPC_JMP_long); - *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4); - s->code_ptr += 4; - -#if TCG_TARGET_REG_BITS == 32 - if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { - /* Pop and discard. This is 2 bytes smaller than the add. */ - tcg_out_pop(s, TCG_REG_ECX); - } else if (stack_adjust != 0) { - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust); - } -#endif - - switch(opc) { - case 0 | 4: - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case 0: - tcg_out_ext8u(s, data_reg, TCG_REG_EAX); - break; - case 1: - tcg_out_ext16u(s, data_reg, TCG_REG_EAX); - break; - case 2: - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - break; -#if TCG_TARGET_REG_BITS == 64 - case 2 | 4: - tcg_out_ext32s(s, data_reg, TCG_REG_EAX); - break; -#endif - case 3: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); - } else if (data_reg == TCG_REG_EDX) { - /* xchg %edx, %eax */ - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX); - } - break; - default: - tcg_abort(); - } - - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, (tcg_target_long)raddr); -} - -/* - * Generate code for the slow path for a store at the end of block - */ -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label) -{ - int s_bits; - int stack_adjust; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; -#if TCG_TARGET_REG_BITS == 32 - int data_reg2 = label->datahi_reg; - int addrlo_reg = label->addrlo_reg; - int addrhi_reg = label->addrhi_reg; -#endif - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; - - s_bits = opc & 3; - - /* resolve label address */ - *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); - } - -#if TCG_TARGET_REG_BITS == 32 - tcg_out_pushi(s, mem_index); - stack_adjust = 4; - if (opc == 3) { - tcg_out_push(s, data_reg2); - stack_adjust += 4; - } - tcg_out_push(s, data_reg); - stack_adjust += 4; - if (TARGET_LONG_BITS == 64) { - tcg_out_push(s, addrhi_reg); - stack_adjust += 4; - } - tcg_out_push(s, addrlo_reg); - stack_adjust += 4; - tcg_out_push(s, TCG_AREG0); - stack_adjust += 4; -#else - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[2], data_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index); - stack_adjust = 0; -#endif - - /* Code generation of qemu_ld/st's slow path calling MMU helper - - PRE_PROC ... - call MMU helper - jmp POST_PROC (2b) : short forward jump <- GETRA() - jmp next_code (5b) : dummy long backward jump which is never executed - POST_PROC ... : do post-processing <- GETRA() + 7 - jmp next_code : jump to the code corresponding to next IR of qemu_ld/st - */ - - tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); - - /* Jump to post-processing code */ - tcg_out8(s, OPC_JMP_short); - tcg_out8(s, 5); - /* Dummy backward jump having information of fast path'pc for MMU helpers */ - tcg_out8(s, OPC_JMP_long); - *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4); - s->code_ptr += 4; - - if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { - /* Pop and discard. This is 2 bytes smaller than the add. */ - tcg_out_pop(s, TCG_REG_ECX); - } else if (stack_adjust != 0) { - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust); - } - - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, (tcg_target_long)raddr); -} - -/* - * Generate TB finalization at the end of block - */ -void tcg_out_tb_finalize(TCGContext *s) -{ - int i; - TCGLabelQemuLdst *label; - - /* qemu_ld/st slow paths */ - for (i = 0; i < s->nb_qemu_ldst_labels; i++) { - label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[i]; - if (label->is_ld) { - tcg_out_qemu_ld_slow_path(s, label); - } else { - tcg_out_qemu_st_slow_path(s, label); - } - } -} -#endif /* CONFIG_SOFTMMU */ - static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1683,7 +1615,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch(opc) { case INDEX_op_exit_tb: tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); - tcg_out_jmp(s, (tcg_target_long) tb_ret_addr); + tcg_out_jmp(s, (uintptr_t)tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { @@ -1694,7 +1626,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else { /* indirect jump method */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (tcg_target_long)(s->tb_next + args[0])); + (intptr_t)(s->tb_next + args[0])); } s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; break; @@ -1890,40 +1822,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16u(s, args[0], args[1]); break; - case INDEX_op_qemu_ld8u: + case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args, 0); break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: + case INDEX_op_qemu_ld_i64: tcg_out_qemu_ld(s, args, 1); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_qemu_ld32u: -#endif - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); - break; - - case INDEX_op_qemu_st8: + case INDEX_op_qemu_st_i32: tcg_out_qemu_st(s, args, 0); break; - case INDEX_op_qemu_st16: + case INDEX_op_qemu_st_i64: tcg_out_qemu_st(s, args, 1); break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); - break; OP_32_64(mulu2): tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); @@ -1982,9 +1892,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); } break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, 2 | 4); - break; case INDEX_op_brcond_i64: tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], @@ -2149,43 +2056,20 @@ static const TCGTargetOpDef x86_op_defs[] = { #endif #if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L" } }, - - { INDEX_op_qemu_st8, { "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, - - { INDEX_op_qemu_st8, { "cb", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L", "L" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, #else - { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, - - { INDEX_op_qemu_st8, { "cb", "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L", "L", "L" } }, + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, #endif { -1 }, }; @@ -2387,7 +2271,7 @@ static DebugFrame debug_frame = { #if defined(ELF_HOST_MACHINE) void tcg_register_jit(void *buf, size_t buf_size) { - debug_frame.fde.func_start = (tcg_target_long) buf; + debug_frame.fde.func_start = (uintptr_t)buf; debug_frame.fde.func_len = buf_size; tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index e3f6bb965f..92c0fcd36d 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -24,12 +24,14 @@ #ifndef TCG_TARGET_I386 #define TCG_TARGET_I386 1 -//#define TCG_TARGET_WORDS_BIGENDIAN +#undef TCG_TARGET_WORDS_BIGENDIAN -#if TCG_TARGET_REG_BITS == 64 -# define TCG_TARGET_NB_REGS 16 +#ifdef __x86_64__ +# define TCG_TARGET_REG_BITS 64 +# define TCG_TARGET_NB_REGS 16 #else -# define TCG_TARGET_NB_REGS 8 +# define TCG_TARGET_REG_BITS 32 +# define TCG_TARGET_NB_REGS 8 #endif typedef enum { @@ -96,6 +98,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -122,8 +126,12 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif +#define TCG_TARGET_HAS_new_ldst 1 + #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ ((ofs) == 0 && (len) == 16)) @@ -135,8 +143,7 @@ typedef enum { # define TCG_AREG0 TCG_REG_EBP #endif -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { } diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 2373d9ef79..2d8e00cd94 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -23,6 +23,8 @@ * THE SOFTWARE. */ +#include "tcg-be-null.h" + /* * Register definitions */ @@ -107,7 +109,6 @@ enum { }; static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R33, TCG_REG_R35, TCG_REG_R36, TCG_REG_R37, @@ -224,6 +225,7 @@ enum { OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, OPC_CMP_LT_A6 = 0x18000000000ull, OPC_CMP_LTU_A6 = 0x1a000000000ull, OPC_CMP_EQ_A6 = 0x1c000000000ull, @@ -261,6 +263,7 @@ enum { OPC_MOV_I_I26 = 0x00150000000ull, OPC_MOVL_X2 = 0x0c000000000ull, OPC_OR_A1 = 0x10070000000ull, + OPC_OR_A3 = 0x10170000000ull, OPC_SETF_EXP_M18 = 0x0c748000000ull, OPC_SETF_SIG_M18 = 0x0c708000000ull, OPC_SHL_I7 = 0x0f240000000ull, @@ -279,9 +282,13 @@ enum { OPC_UNPACK4_L_I2 = 0x0f860000000ull, OPC_XMA_L_F2 = 0x1d000000000ull, OPC_XOR_A1 = 0x10078000000ull, + OPC_XOR_A3 = 0x10178000000ull, OPC_ZXT1_I29 = 0x00080000000ull, OPC_ZXT2_I29 = 0x00088000000ull, OPC_ZXT4_I29 = 0x00090000000ull, + + INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */ + INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */ }; static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, @@ -579,6 +586,8 @@ static inline uint64_t tcg_opc_l3(uint64_t imm) return (imm & 0x07fffffffff00000ull) >> 18; } +#define tcg_opc_l4 tcg_opc_l3 + static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) { return opc @@ -663,21 +672,30 @@ static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) | (qp & 0x3f); } +static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + /* * Relocations */ -static inline void reloc_pcrel21b (void *pc, tcg_target_long target) +static inline void reloc_pcrel21b(void *pc, intptr_t target) { uint64_t imm; int64_t disp; int slot; - slot = (tcg_target_long) pc & 3; - pc = (void *)((tcg_target_long) pc & ~3); + slot = (intptr_t)pc & 3; + pc = (void *)((intptr_t)pc & ~3); - disp = target - (tcg_target_long) pc; + disp = target - (intptr_t)pc; imm = (uint64_t) disp >> 4; switch(slot) { @@ -728,12 +746,12 @@ static inline uint64_t get_reloc_pcrel21b (void *pc) } } -static inline void reloc_pcrel60b (void *pc, tcg_target_long target) +static inline void reloc_pcrel60b(void *pc, intptr_t target) { int64_t disp; uint64_t imm; - disp = target - (tcg_target_long) pc; + disp = target - (intptr_t)pc; imm = (uint64_t) disp >> 4; *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fff800000ull) @@ -759,7 +777,7 @@ static inline uint64_t get_reloc_pcrel60b (void *pc) static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { value += addend; switch (type) { @@ -849,20 +867,31 @@ static inline void tcg_out_bundle(TCGContext *s, int template, s->code_ptr += 16; } +static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) +{ + return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); +} + static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg)); + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_mov_a(TCG_REG_P0, ret, arg)); +} + +static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) +{ + assert(src == sextract64(src, 0, 22)); + return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); } static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg reg, tcg_target_long arg) { tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_l2 (arg), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); } @@ -875,8 +904,8 @@ static void tcg_out_br(TCGContext *s, int label_index) the existing value and using it again. This ensure that caches and memory are kept coherent during retranslation. */ tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, + INSN_NOP_M, tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1, get_reloc_pcrel21b(s->code_ptr + 2))); @@ -888,7 +917,23 @@ static void tcg_out_br(TCGContext *s, int label_index) } } -static inline void tcg_out_call(TCGContext *s, TCGArg addr) +static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) +{ + /* Look through the function descriptor. */ + uintptr_t disp, *desc = (uintptr_t *)addr; + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (desc[1]), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1])); + disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l4 (disp), + tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, + TCG_REG_B0, disp)); +} + +static inline void tcg_out_callr(TCGContext *s, TCGReg addr) { tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr), @@ -897,7 +942,7 @@ static inline void tcg_out_call(TCGContext *s, TCGArg addr) TCG_REG_B6, TCG_REG_R2, 0)); tcg_out_bundle(s, mmB, tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } @@ -913,7 +958,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) imm = (uint64_t)disp >> 4; tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_l3 (imm), tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); } @@ -930,12 +975,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R2, 0)); tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, + INSN_NOP_M, tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } @@ -945,12 +990,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) { tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, + INSN_NOP_M, tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, + INSN_NOP_M, tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } @@ -962,14 +1007,14 @@ static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R2, arg2, arg1), tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); tcg_out_bundle(s, MmI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, arg1), tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } } @@ -981,19 +1026,19 @@ static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R2, arg2, arg1), tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); tcg_out_bundle(s, MmI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, arg1), tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { if (type == TCG_TYPE_I32) { tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); @@ -1003,7 +1048,7 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { if (type == TCG_TYPE_I32) { tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); @@ -1012,32 +1057,59 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, } } -static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret, - TCGArg arg1, int const_arg1, +static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, + TCGReg ret, TCGArg arg1, int const_arg1, TCGArg arg2, int const_arg2) { - uint64_t opc1, opc2; + uint64_t opc1 = 0, opc2 = 0, opc3 = 0; + if (const_arg2 && arg2 != 0) { + opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); + arg2 = TCG_REG_R3; + } if (const_arg1 && arg1 != 0) { - opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R2, arg1, TCG_REG_R0); - arg1 = TCG_REG_R2; - } else { - opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + if (opc_a3 && arg1 == (int8_t)arg1) { + opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); + } else { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); + arg1 = TCG_REG_R2; + } + } + if (opc3 == 0) { + opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); } - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R3, arg2, TCG_REG_R0); - arg2 = TCG_REG_R3; + tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), + opc1 ? opc1 : INSN_NOP_M, + opc2 ? opc2 : INSN_NOP_I, + opc3); +} + +static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); } else { - opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0); + tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); } +} - tcg_out_bundle(s, mII, - opc1, - opc2, - tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2)); +static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, + int const_arg1, TCGArg arg2, int const_arg2) +{ + if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); + } else { + tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, + arg1, const_arg1, arg2, const_arg2); + } } static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, @@ -1045,7 +1117,7 @@ static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, TCGArg arg2, int const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); } @@ -1055,7 +1127,7 @@ static inline void tcg_out_nand(TCGContext *s, TCGArg ret, TCGArg arg2, int const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); } @@ -1065,7 +1137,7 @@ static inline void tcg_out_nor(TCGContext *s, TCGArg ret, TCGArg arg2, int const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); } @@ -1075,7 +1147,7 @@ static inline void tcg_out_orc(TCGContext *s, TCGArg ret, TCGArg arg2, int const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); } @@ -1086,16 +1158,16 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret, tcg_out_bundle(s, mmI, tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); tcg_out_bundle(s, mmF, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, + INSN_NOP_M, tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, TCG_REG_F7, TCG_REG_F0)); tcg_out_bundle(s, miI, tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I, + INSN_NOP_I); } static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, @@ -1103,8 +1175,8 @@ static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, ret, arg1, arg2, 31 - arg2)); } else { @@ -1122,14 +1194,14 @@ static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, ret, arg1, arg2, 63 - arg2)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); } } @@ -1139,13 +1211,13 @@ static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg1, 63 - arg2, 31 - arg2)); } else { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, 0x1f, arg2), tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, @@ -1158,14 +1230,14 @@ static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg1, 63 - arg2, 63 - arg2)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, arg1, arg2)); } @@ -1176,8 +1248,8 @@ static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, arg1, arg2, 31 - arg2)); } else { @@ -1195,14 +1267,14 @@ static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, arg1, arg2, 63 - arg2)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, arg1, arg2)); } @@ -1213,20 +1285,20 @@ static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1), tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, TCG_REG_R2, 32 - arg2, 31)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1), tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, 0x1f, arg2)); tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, 0x20, TCG_REG_R3), tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, @@ -1239,8 +1311,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, arg1, 0x40 - arg2)); } else { @@ -1252,8 +1324,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, arg1, TCG_REG_R2)); tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, TCG_REG_R2, TCG_REG_R3)); } @@ -1264,7 +1336,7 @@ static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1), tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, @@ -1285,8 +1357,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, { if (const_arg2) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, arg1, arg2)); } else { @@ -1298,44 +1370,63 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, arg1, TCG_REG_R2)); tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, TCG_REG_R2, TCG_REG_R3)); } } +static const uint64_t opc_ext_i29[8] = { + OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, + OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 +}; + +static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) +{ + if ((opc & MO_SIZE) == MO_64) { + return tcg_opc_mov_a(qp, d, s); + } else { + return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); + } +} + static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, TCGArg ret, TCGArg arg) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + INSN_NOP_I, tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); } +static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) +{ + return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); +} + static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); } static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); } static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb)); + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); } static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, @@ -1355,8 +1446,7 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, } else { /* Otherwise, load any constant into a temporary. Do this into the first I slot to help out with cross-unit delays. */ - i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R2, a2, TCG_REG_R0); + i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); a2 = TCG_REG_R2; } } @@ -1364,8 +1454,8 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); } tcg_out_bundle(s, (i1 ? mII : miI), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_M, + i1 ? i1 : INSN_NOP_I, i2); } @@ -1411,38 +1501,16 @@ static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, } } -static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, - int const_arg1, TCGArg arg2, int const_arg2, - int label_index, int cmp4) +static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, int label_index, int cmp4) { TCGLabel *l = &s->labels[label_index]; - uint64_t opc1, opc2; - if (const_arg1 && arg1 != 0) { - opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - arg1, TCG_REG_R0); - arg1 = TCG_REG_R2; - } else { - opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); - } - - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3, - arg2, TCG_REG_R0); - arg2 = TCG_REG_R3; - } else { - opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0); - } - - tcg_out_bundle(s, mII, - opc1, - opc2, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4)); - tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, + get_reloc_pcrel21b(s->code_ptr + 2))); if (l->has_value) { reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); @@ -1457,8 +1525,8 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, { tcg_out_bundle(s, MmI, tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0), - tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0)); + tcg_opc_movi_a(TCG_REG_P6, ret, 1), + tcg_opc_movi_a(TCG_REG_P7, ret, 0)); } static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, @@ -1469,18 +1537,18 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, uint64_t opc1, opc2; if (const_v1) { - opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0); + opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); } else if (ret == v1) { - opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + opc1 = INSN_NOP_M; } else { - opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1); + opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); } if (const_v2) { - opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0); + opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); } else if (ret == v2) { - opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + opc2 = INSN_NOP_I; } else { - opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2); + opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); } tcg_out_bundle(s, MmI, @@ -1490,18 +1558,15 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, } #if defined(CONFIG_SOFTMMU) - -#include "exec/softmmu_defs.h" - /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. R57 is loaded with the address, zero extented on 32-bit targets. */ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, - int s_bits, uint64_t offset_rw, + TCGMemOp s_bits, uint64_t offset_rw, uint64_t offset_addend) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, @@ -1510,12 +1575,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, tcg_out_bundle(s, mII, tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, offset_rw, TCG_REG_R2), -#if TARGET_LONG_BITS == 32 - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg), -#else - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57, - 0, addr_reg), -#endif + tcg_opc_ext_i(TCG_REG_P0, + TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, + TCG_REG_R57, addr_reg), tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_AREG0)); tcg_out_bundle(s, mII, @@ -1539,23 +1601,20 @@ static const void * const qemu_ld_helpers[4] = { helper_ldq_mmu, }; -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, + TCGMemOp opc) { - int addr_reg, data_reg, mem_index, s_bits, bswap; - uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; - uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, - OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 }; + static const uint64_t opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + int addr_reg, data_reg, mem_index; + TCGMemOp s_bits, bswap; data_reg = *args++; addr_reg = *args++; mem_index = *args; - s_bits = opc & 3; - -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; /* Read the TLB entry */ tcg_out_qemu_tlb(s, addr_reg, s_bits, @@ -1564,8 +1623,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0), + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_ld_helpers[s_bits])); @@ -1576,14 +1634,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); - if (bswap && s_bits == 1) { + if (bswap && s_bits == MO_16) { tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R3), tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, TCG_REG_R8, TCG_REG_R8, 15, 15)); - } else if (bswap && s_bits == 2) { + } else if (bswap && s_bits == MO_32) { tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R3), @@ -1595,38 +1653,26 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R3), tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } - if (!bswap || s_bits == 0) { + if (!bswap) { tcg_out_bundle(s, miB, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, - mem_index, TCG_REG_R0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), + INSN_NOP_I, tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } else { tcg_out_bundle(s, miB, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, - mem_index, TCG_REG_R0), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R8, TCG_REG_R8, 0xb), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), + tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } - if (opc == 3) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - data_reg, 0, TCG_REG_R8)); - } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc], - data_reg, TCG_REG_R8)); - } + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8)); } /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, @@ -1638,32 +1684,30 @@ static const void * const qemu_st_helpers[4] = { helper_stq_mmu, }; -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, + TCGMemOp opc) { - int addr_reg, data_reg, mem_index, bswap; - uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; + static const uint64_t opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; + int addr_reg, data_reg, mem_index; + TCGMemOp s_bits; data_reg = *args++; addr_reg = *args++; mem_index = *args; + s_bits = opc & MO_SIZE; -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif - - tcg_out_qemu_tlb(s, addr_reg, opc, + tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), offsetof(CPUArchState, tlb_table[mem_index][0].addend)); /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]), + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_st_helpers[opc])); + (tcg_target_long) qemu_st_helpers[s_bits])); tcg_out_bundle(s, MmI, tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), @@ -1672,150 +1716,145 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); - if (!bswap || opc == 0) { + switch (opc) { + case MO_8: + case MO_16: + case MO_32: + case MO_64: tcg_out_bundle(s, mii, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, - 0, data_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else if (opc == 1) { + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), + INSN_NOP_I); + break; + + case MO_16 | MO_BSWAP: tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_I, tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, TCG_REG_R2, data_reg, 15, 15)); tcg_out_bundle(s, miI, - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, - 0, data_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, TCG_REG_R2, 0xb)); + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); data_reg = TCG_REG_R2; - } else if (opc == 2) { + break; + + case MO_32 | MO_BSWAP: tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_I, tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, TCG_REG_R2, data_reg, 31, 31)); tcg_out_bundle(s, miI, - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, - 0, data_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, TCG_REG_R2, 0xb)); + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); data_reg = TCG_REG_R2; - } else if (opc == 3) { + break; + + case MO_64 | MO_BSWAP: tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, - 0, data_reg), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, data_reg, 0xb)); + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), + tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg)); data_reg = TCG_REG_R2; + break; + + default: + tcg_abort(); } tcg_out_bundle(s, miB, - tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc], + tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], data_reg, TCG_REG_R3), - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59, - mem_index, TCG_REG_R0), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } #else /* !CONFIG_SOFTMMU */ -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, + TCGMemOp opc) { static uint64_t const opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; - static uint64_t const opc_sxt_i29[4] = { - OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 - }; - int addr_reg, data_reg, s_bits, bswap; + int addr_reg, data_reg; + TCGMemOp s_bits, bswap; data_reg = *args++; addr_reg = *args++; - s_bits = opc & 3; - -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 if (GUEST_BASE != 0) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, addr_reg), tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, TCG_REG_R3)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } - if (!bswap || s_bits == 0) { - if (s_bits == opc) { + if (!bswap) { + if (!(opc & MO_SIGN)) { tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I, + INSN_NOP_I); } else { tcg_out_bundle(s, mII, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); } - } else if (s_bits == 3) { + } else if (s_bits == MO_64) { tcg_out_bundle(s, mII, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); } else { - if (s_bits == 1) { + if (s_bits == MO_16) { tcg_out_bundle(s, mII, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_I, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, data_reg, data_reg, 15, 15)); } else { tcg_out_bundle(s, mII, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + INSN_NOP_I, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, data_reg, data_reg, 31, 31)); } - if (opc == s_bits) { + if (!(opc & MO_SIGN)) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); } else { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); + INSN_NOP_M, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); } } #else @@ -1825,157 +1864,149 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) TCG_GUEST_BASE_REG, addr_reg), tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } else { tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } - if (bswap && s_bits == 1) { + if (bswap && s_bits == MO_16) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, data_reg, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else if (bswap && s_bits == 2) { + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_32) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, data_reg, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else if (bswap && s_bits == 3) { + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_64) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); } - if (s_bits != opc) { + if (opc & MO_SIGN) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); } #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, + TCGMemOp opc) { static uint64_t const opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; - int addr_reg, data_reg, bswap; + int addr_reg, data_reg; #if TARGET_LONG_BITS == 64 uint64_t add_guest_base; #endif + TCGMemOp s_bits, bswap; data_reg = *args++; addr_reg = *args++; - -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; -#else - bswap = 0; -#endif + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 if (GUEST_BASE != 0) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, addr_reg), tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, TCG_REG_R3)); } else { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } if (bswap) { - if (opc == 1) { + if (s_bits == MO_16) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); data_reg = TCG_REG_R3; - } else if (opc == 2) { + } else if (s_bits == MO_32) { tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); data_reg = TCG_REG_R3; - } else if (opc == 3) { + } else if (s_bits == MO_64) { tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, data_reg, 0xb)); + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); data_reg = TCG_REG_R3; } } tcg_out_bundle(s, mmI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, TCG_REG_R2), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_M, + INSN_NOP_I); #else if (GUEST_BASE != 0) { add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg); addr_reg = TCG_REG_R2; } else { - add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + add_guest_base = INSN_NOP_M; } - if (!bswap || opc == 0) { + if (!bswap) { tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), add_guest_base, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I); } else { - if (opc == 1) { + if (s_bits == MO_16) { tcg_out_bundle(s, mII, add_guest_base, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); data_reg = TCG_REG_R3; - } else if (opc == 2) { + } else if (s_bits == MO_32) { tcg_out_bundle(s, mII, add_guest_base, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); data_reg = TCG_REG_R3; - } else if (opc == 3) { + } else if (s_bits == MO_64) { tcg_out_bundle(s, miI, add_guest_base, - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, data_reg, 0xb)); + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); data_reg = TCG_REG_R3; } tcg_out_bundle(s, miI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + INSN_NOP_I, + INSN_NOP_I); } #endif } @@ -1993,7 +2024,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_br(s, args[0]); break; case INDEX_op_call: - tcg_out_call(s, args[0]); + if (likely(const_args[0])) { + tcg_out_calli(s, args[0]); + } else { + tcg_out_callr(s, args[0]); + } break; case INDEX_op_goto_tb: tcg_out_goto_tb(s, args[0]); @@ -2053,24 +2088,23 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_add_i32: case INDEX_op_add_i64: - tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_add(s, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_sub_i32: case INDEX_op_sub_i64: - tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); break; case INDEX_op_and_i32: case INDEX_op_and_i64: - tcg_out_alu(s, OPC_AND_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); break; case INDEX_op_andc_i32: case INDEX_op_andc_i64: - tcg_out_alu(s, OPC_ANDCM_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], + args[1], const_args[1], args[2], const_args[2]); break; case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: @@ -2089,8 +2123,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_or_i32: case INDEX_op_or_i64: - tcg_out_alu(s, OPC_OR_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); break; case INDEX_op_orc_i32: case INDEX_op_orc_i64: @@ -2099,8 +2134,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_xor_i32: case INDEX_op_xor_i64: - tcg_out_alu(s, OPC_XOR_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); break; case INDEX_op_mul_i32: @@ -2181,12 +2217,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], const_args[0], - args[1], const_args[1], args[3], 1); + tcg_out_brcond(s, args[2], args[0], args[1], args[3], 1); break; case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], const_args[0], - args[1], const_args[1], args[3], 0); + tcg_out_brcond(s, args[2], args[0], args[1], args[3], 0); break; case INDEX_op_setcond_i32: tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); @@ -2204,39 +2238,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args, MO_UB); break; case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); + tcg_out_qemu_ld(s, args, MO_SB); break; case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args, MO_TEUW); break; case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); + tcg_out_qemu_ld(s, args, MO_TESW); break; case INDEX_op_qemu_ld32: case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, 2); + tcg_out_qemu_ld(s, args, MO_TEUL); break; case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, 2 | 4); + tcg_out_qemu_ld(s, args, MO_TESL); break; case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); + tcg_out_qemu_ld(s, args, MO_TEQ); break; case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args, MO_UB); break; case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args, MO_TEUW); break; case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); + tcg_out_qemu_st(s, args, MO_TEUL); break; case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); + tcg_out_qemu_st(s, args, MO_TEQ); break; default: @@ -2246,7 +2280,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_br, { } }, - { INDEX_op_call, { "r" } }, + { INDEX_op_call, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, @@ -2262,7 +2296,7 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_st16_i32, { "rZ", "r" } }, { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "rI", "rI" } }, + { INDEX_op_add_i32, { "r", "rZ", "rI" } }, { INDEX_op_sub_i32, { "r", "rI", "rI" } }, { INDEX_op_and_i32, { "r", "rI", "rI" } }, @@ -2290,7 +2324,7 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_bswap16_i32, { "r", "rZ" } }, { INDEX_op_bswap32_i32, { "r", "rZ" } }, - { INDEX_op_brcond_i32, { "rI", "rI" } }, + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, @@ -2309,7 +2343,7 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_st32_i64, { "rZ", "r" } }, { INDEX_op_st_i64, { "rZ", "r" } }, - { INDEX_op_add_i64, { "r", "rI", "rI" } }, + { INDEX_op_add_i64, { "r", "rZ", "rI" } }, { INDEX_op_sub_i64, { "r", "rI", "rI" } }, { INDEX_op_and_i64, { "r", "rI", "rI" } }, @@ -2340,7 +2374,7 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_bswap32_i64, { "r", "rZ" } }, { INDEX_op_bswap64_i64, { "r", "rZ" } }, - { INDEX_op_brcond_i64, { "rI", "rI" } }, + { INDEX_op_brcond_i64, { "rZ", "rZ" } }, { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, @@ -2385,8 +2419,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_bundle(s, miI, tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, TCG_REG_R34, 32, 24, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_AREG0, 0, TCG_REG_R32), + INSN_NOP_I, tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R33, 0)); @@ -2394,7 +2427,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) an ADDL in the M slot of the next bundle. */ if (GUEST_BASE != 0) { tcg_out_bundle(s, mlx, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_l2 (GUEST_BASE), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_GUEST_BASE_REG, GUEST_BASE)); @@ -2405,19 +2438,19 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R12, -frame_size, TCG_REG_R12), tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - TCG_REG_R32, TCG_REG_B0), + TCG_REG_R33, TCG_REG_B0), tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); /* epilogue */ tb_ret_addr = s->code_ptr; tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B0, TCG_REG_R32, 0), + TCG_REG_B0, TCG_REG_R33, 0), tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R12, frame_size, TCG_REG_R12)); tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + INSN_NOP_M, tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, TCG_REG_PFS, TCG_REG_R34), tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, @@ -2470,16 +2503,17 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R32); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */ - /* The following 3 are not in use, are call-saved, but *not* saved + /* The following 4 are not in use, are call-saved, but *not* saved by the prologue. Therefore we cannot use them without modifying the prologue. There doesn't seem to be any good reason to use these as opposed to the windowed registers. */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); tcg_add_target_add_op_defs(ia64_op_defs); } diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index f32d5199cb..52a939c946 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -92,6 +92,8 @@ typedef enum { TCG_REG_R61, TCG_REG_R62, TCG_REG_R63, + + TCG_AREG0 = TCG_REG_R32, } TCGReg; #define TCG_CT_CONST_ZERO 0x100 @@ -146,6 +148,12 @@ typedef enum { #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_mulsh_i64 0 + +#define TCG_TARGET_HAS_new_ldst 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) @@ -156,10 +164,7 @@ typedef enum { #define TCG_TARGET_HAS_not_i32 0 /* xor r1, -1, r3 */ #define TCG_TARGET_HAS_not_i64 0 /* xor r1, -1, r3 */ -#define TCG_AREG0 TCG_REG_R7 - -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { start = start & ~(32UL - 1UL); stop = (stop + (32UL - 1UL)) & ~(32UL - 1UL); diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 793532ec95..40551cdcb5 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +#include "tcg-be-null.h" + #if defined(TCG_TARGET_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN) # define TCG_NEED_BSWAP 0 #else @@ -108,33 +110,33 @@ static const TCGReg tcg_target_call_oarg_regs[2] = { static uint8_t *tb_ret_addr; -static inline uint32_t reloc_lo16_val (void *pc, tcg_target_long target) +static inline uint32_t reloc_lo16_val(void *pc, intptr_t target) { return target & 0xffff; } -static inline void reloc_lo16 (void *pc, tcg_target_long target) +static inline void reloc_lo16(void *pc, intptr_t target) { *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff) | reloc_lo16_val(pc, target); } -static inline uint32_t reloc_hi16_val (void *pc, tcg_target_long target) +static inline uint32_t reloc_hi16_val(void *pc, intptr_t target) { return (target >> 16) & 0xffff; } -static inline void reloc_hi16 (void *pc, tcg_target_long target) +static inline void reloc_hi16(void *pc, intptr_t target) { *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff) | reloc_hi16_val(pc, target); } -static inline uint32_t reloc_pc16_val (void *pc, tcg_target_long target) +static inline uint32_t reloc_pc16_val(void *pc, intptr_t target) { int32_t disp; - disp = target - (tcg_target_long) pc - 4; + disp = target - (intptr_t)pc - 4; if (disp != (disp << 14) >> 14) { tcg_abort (); } @@ -157,14 +159,14 @@ static inline uint32_t reloc_26_val (void *pc, tcg_target_long target) return (target >> 2) & 0x3ffffff; } -static inline void reloc_pc26 (void *pc, tcg_target_long target) +static inline void reloc_pc26(void *pc, intptr_t target) { *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3ffffff) | reloc_26_val(pc, target); } static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { value += addend; switch(type) { @@ -422,83 +424,83 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) { -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); -#else - /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { - tcg_abort(); - } + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + tcg_abort(); + } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); -#endif + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + } } static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) { -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); -#else - /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { - tcg_abort(); - } + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + tcg_abort(); + } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); -#endif + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + } } static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) { -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); -#else - /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { - tcg_abort(); - } + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + } else { + /* ret and arg must be different and can't be register at */ + if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { + tcg_abort(); + } - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_REG_AT, TCG_REG_AT, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_REG_AT, TCG_REG_AT, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); -#endif + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + } } static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) { -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) - tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); -#else - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); -#endif + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); + } } static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) { -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) - tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); -#else - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); -#endif + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + } } static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, @@ -514,13 +516,13 @@ static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); } @@ -919,9 +921,6 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, } #if defined(CONFIG_SOFTMMU) - -#include "exec/softmmu_defs.h" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { @@ -1406,12 +1405,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); break; case INDEX_op_mul_i32: -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1) - tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); -#else - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); -#endif + if (use_mips32_instructions) { + tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); + } else { + tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); + tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + } break; case INDEX_op_muls2_i32: tcg_out_opc_reg(s, OPC_MULT, 0, args[2], args[3]); @@ -1423,6 +1422,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); break; + case INDEX_op_mulsh_i32: + tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + break; + case INDEX_op_muluh_i32: + tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + break; case INDEX_op_div_i32: tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); @@ -1506,20 +1513,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; - /* The bswap routines do not work on non-R2 CPU. In that case - we let TCG generating the corresponding code. */ case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, args[0], args[1]); + tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); break; case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, args[0], args[1]); + tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); + tcg_out_opc_sa(s, OPC_ROTR, args[0], args[0], 16); break; case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, args[0], args[1]); + tcg_out_opc_reg(s, OPC_SEB, args[0], 0, args[1]); break; case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, args[0], args[1]); + tcg_out_opc_reg(s, OPC_SEH, args[0], 0, args[1]); break; case INDEX_op_deposit_i32: @@ -1602,6 +1608,8 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, + { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, @@ -1617,29 +1625,19 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, -#if TCG_TARGET_HAS_rot_i32 { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, -#endif -#if TCG_TARGET_HAS_bswap16_i32 { INDEX_op_bswap16_i32, { "r", "r" } }, -#endif -#if TCG_TARGET_HAS_bswap32_i32 { INDEX_op_bswap32_i32, { "r", "r" } }, -#endif { INDEX_op_ext8s_i32, { "r", "rZ" } }, { INDEX_op_ext16s_i32, { "r", "rZ" } }, -#if TCG_TARGET_HAS_deposit_i32 { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, -#endif { INDEX_op_brcond_i32, { "rZ", "rZ" } }, -#if TCG_TARGET_HAS_movcond_i32 { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, -#endif { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, @@ -1688,6 +1686,86 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_RA, /* should be last for ABI compliance */ }; +/* The Linux kernel doesn't provide any information about the available + instruction set. Probe it using a signal handler. */ + +#include <signal.h> + +#ifndef use_movnz_instructions +bool use_movnz_instructions = false; +#endif + +#ifndef use_mips32_instructions +bool use_mips32_instructions = false; +#endif + +#ifndef use_mips32r2_instructions +bool use_mips32r2_instructions = false; +#endif + +static volatile sig_atomic_t got_sigill; + +static void sigill_handler(int signo, siginfo_t *si, void *data) +{ + /* Skip the faulty instruction */ + ucontext_t *uc = (ucontext_t *)data; + uc->uc_mcontext.pc += 4; + + got_sigill = 1; +} + +static void tcg_target_detect_isa(void) +{ + struct sigaction sa_old, sa_new; + + memset(&sa_new, 0, sizeof(sa_new)); + sa_new.sa_flags = SA_SIGINFO; + sa_new.sa_sigaction = sigill_handler; + sigaction(SIGILL, &sa_new, &sa_old); + + /* Probe for movn/movz, necessary to implement movcond. */ +#ifndef use_movnz_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "movn $zero, $zero, $zero\n" + "movz $zero, $zero, $zero\n" + ".set pop\n" + : : : ); + use_movnz_instructions = !got_sigill; +#endif + + /* Probe for MIPS32 instructions. As no subsetting is allowed + by the specification, it is only necessary to probe for one + of the instructions. */ +#ifndef use_mips32_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "mul $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32_instructions = !got_sigill; +#endif + + /* Probe for MIPS32r2 instructions if MIPS32 instructions are + available. As no subsetting is allowed by the specification, + it is only necessary to probe for one of the instructions. */ +#ifndef use_mips32r2_instructions + if (use_mips32_instructions) { + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32r2\n" + "seb $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32r2_instructions = !got_sigill; + } +#endif + + sigaction(SIGILL, &sa_old, NULL); +} + /* Generate global QEMU prologue and epilogue code */ static void tcg_target_qemu_prologue(TCGContext *s) { @@ -1727,6 +1805,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_target_init(TCGContext *s) { + tcg_target_detect_isa(); tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); tcg_regset_set(tcg_target_call_clobber_regs, (1 << TCG_REG_V0) | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index a438950bc1..683c6af8b9 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -77,40 +77,52 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 16 #define TCG_TARGET_CALL_ALIGN_ARGS 1 +/* MOVN/MOVZ instructions detection */ +#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ + defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \ + defined(_MIPS_ARCH_MIPS4) +#define use_movnz_instructions 1 +#else +extern bool use_movnz_instructions; +#endif + +/* MIPS32 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1) +#define use_mips32_instructions 1 +#else +extern bool use_mips32_instructions; +#endif + +/* MIPS32R2 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +#define use_mips32r2_instructions 1 +#else +extern bool use_mips32r2_instructions; +#endif + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 1 +#define TCG_TARGET_HAS_mulsh_i32 1 -/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ -#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ - defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \ - defined(_MIPS_ARCH_MIPS4) -#define TCG_TARGET_HAS_movcond_i32 1 -#else -#define TCG_TARGET_HAS_movcond_i32 0 -#endif +/* optional instructions detected at runtime */ +#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions +#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions +#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions +#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions +#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions +#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions +#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions -/* optional instructions only implemented on MIPS32R2 */ -#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_deposit_i32 1 -#else -#define TCG_TARGET_HAS_bswap16_i32 0 -#define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_rot_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 -#endif +#define TCG_TARGET_HAS_new_ldst 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ @@ -125,8 +137,7 @@ typedef enum { #include <sys/cachectl.h> #endif -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { cacheflush ((void *)start, stop-start, ICACHE); } diff --git a/tcg/optimize.c b/tcg/optimize.c index b35868afbc..89e2d6a3b3 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op) static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { + uint64_t l64, h64; + switch (op) { CASE_OP_32_64(add): return x + y; @@ -236,20 +238,16 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) return (int64_t)x >> (int64_t)y; case INDEX_op_rotr_i32: - x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); - return x; + return ror32(x, y); case INDEX_op_rotr_i64: - x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); - return x; + return ror64(x, y); case INDEX_op_rotl_i32: - x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); - return x; + return rol32(x, y); case INDEX_op_rotl_i64: - x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); - return x; + return rol64(x, y); CASE_OP_32_64(not): return ~x; @@ -290,6 +288,37 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_muluh_i32: + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + case INDEX_op_mulsh_i32: + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + + case INDEX_op_muluh_i64: + mulu64(&l64, &h64, x, y); + return h64; + case INDEX_op_mulsh_i64: + muls64(&l64, &h64, x, y); + return h64; + + case INDEX_op_div_i32: + /* Avoid crashing on divide by zero, otherwise undefined. */ + return (int32_t)x / ((int32_t)y ? : 1); + case INDEX_op_divu_i32: + return (uint32_t)x / ((uint32_t)y ? : 1); + case INDEX_op_div_i64: + return (int64_t)x / ((int64_t)y ? : 1); + case INDEX_op_divu_i64: + return (uint64_t)x / ((uint64_t)y ? : 1); + + case INDEX_op_rem_i32: + return (int32_t)x % ((int32_t)y ? : 1); + case INDEX_op_remu_i32: + return (uint32_t)x % ((uint32_t)y ? : 1); + case INDEX_op_rem_i64: + return (int64_t)x % ((int64_t)y ? : 1); + case INDEX_op_remu_i64: + return (uint64_t)x % ((uint64_t)y ? : 1); + default: fprintf(stderr, "Unrecognized operation %d in do_constant_folding.\n", op); @@ -531,6 +560,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): swap_commutative(args[0], &args[1], &args[2]); break; CASE_OP_32_64(brcond): @@ -771,6 +802,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, switch (op) { CASE_OP_32_64(and): CASE_OP_32_64(mul): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { s->gen_opc_buf[op_index] = op_to_movi(op); @@ -882,6 +915,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): + CASE_OP_32_64(div): + CASE_OP_32_64(divu): + CASE_OP_32_64(rem): + CASE_OP_32_64(remu): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { s->gen_opc_buf[op_index] = op_to_movi(op); diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 453ab6b580..dc2c2df890 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-ldst.h" + static uint8_t *tb_ret_addr; #if defined _CALL_DARWIN || defined __APPLE__ @@ -204,7 +206,7 @@ static void reloc_pc14 (void *pc, tcg_target_long target) } static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { value += addend; switch (type) { @@ -450,7 +452,9 @@ static const uint32_t tcg_to_bc[] = { static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out32 (s, OR | SAB (arg, ret, arg)); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } } static void tcg_out_movi(TCGContext *s, TCGType type, @@ -490,7 +494,8 @@ static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) } } -static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) +static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg, + int lk) { #ifdef _CALL_AIX int reg; @@ -504,14 +509,14 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) tcg_out32 (s, LWZ | RT (0) | RA (reg)); tcg_out32 (s, MTSPR | RA (0) | CTR); tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4); - tcg_out32 (s, BCCTR | BO_ALWAYS | LK); + tcg_out32 (s, BCCTR | BO_ALWAYS | lk); #else if (const_arg) { - tcg_out_b (s, LK, arg); + tcg_out_b (s, lk, arg); } else { tcg_out32 (s, MTSPR | RS (arg) | LR); - tcg_out32 (s, BCLR | BO_ALWAYS | LK); + tcg_out32 (s, BCLR | BO_ALWAYS | lk); } #endif } @@ -520,7 +525,7 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) static void add_qemu_ldst_label (TCGContext *s, int is_ld, - int opc, + TCGMemOp opc, int data_reg, int data_reg2, int addrlo_reg, @@ -529,15 +534,8 @@ static void add_qemu_ldst_label (TCGContext *s, uint8_t *raddr, uint8_t *label_ptr) { - int idx; - TCGLabelQemuLdst *label; - - if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { - tcg_abort(); - } + TCGLabelQemuLdst *label = new_ldst_label(s); - idx = s->nb_qemu_ldst_labels++; - label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; label->is_ld = is_ld; label->opc = opc; label->datalo_reg = data_reg; @@ -549,444 +547,347 @@ static void add_qemu_ldst_label (TCGContext *s, label->label_ptr[0] = label_ptr; } -#include "exec/softmmu_defs.h" - -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; -static void *ld_trampolines[4]; -static void *st_trampolines[4]; +static void *ld_trampolines[16]; +static void *st_trampolines[16]; -static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, - int addr_reg, int addr_reg2, int s_bits, - int offset1, int offset2, uint8_t **label_ptr) +/* Perform the TLB load and compare. Branches to the slow path, placing the + address of the branch in *LABEL_PTR. Loads the addend of the TLB into R0. + Clobbers R1 and R2. */ + +static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg addrlo, TCGReg addrhi, TCGMemOp s_bits, + int mem_index, int is_load, uint8_t **label_ptr) { + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); uint16_t retranst; + TCGReg base = TCG_AREG0; + + /* Extract the page index, shifted into place for tlb index. */ + tcg_out32(s, (RLWINM + | RA(r0) + | RS(addrlo) + | SH(32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) + | MB(32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) + | ME(31 - CPU_TLB_ENTRY_BITS))); + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | RT(r1) | RA(base) | 0x7ff0); + base = r1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } - tcg_out32 (s, (RLWINM - | RA (r0) - | RS (addr_reg) - | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) - | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) - | ME (31 - CPU_TLB_ENTRY_BITS) - ) - ); - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0)); - tcg_out32 (s, (LWZU - | RT (r1) - | RA (r0) - | offset1 - ) - ); - tcg_out32 (s, (RLWINM - | RA (r2) - | RS (addr_reg) - | SH (0) - | MB ((32 - s_bits) & 31) - | ME (31 - TARGET_PAGE_BITS) - ) - ); + /* Clear the non-page, non-alignment bits from the address. */ + tcg_out32(s, (RLWINM + | RA(r2) + | RS(addrlo) + | SH(0) + | MB((32 - s_bits) & 31) + | ME(31 - TARGET_PAGE_BITS))); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1)); -#if TARGET_LONG_BITS == 64 - tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4); - tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); - tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); -#endif - *label_ptr = s->code_ptr; - retranst = ((uint16_t *) s->code_ptr)[1] & ~3; - tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE); + tcg_out32(s, ADD | RT(r0) | RA(r0) | RB(base)); + base = r0; - /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ - tcg_out32 (s, (LWZ - | RT (r0) - | RA (r0) - | offset2 - ) - ); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ + /* Load the tlb comparator. */ + tcg_out32(s, LWZ | RT(r1) | RA(base) | (cmp_off & 0xffff)); + tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, LWZ | RT(r1) | RA(base) | ((cmp_off + 4) & 0xffff)); + } + + /* Load the tlb addend for use on the fast path. + Do this asap to minimize load delay. */ + tcg_out32(s, LWZ | RT(r0) | RA(base) | (add_off & 0xffff)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, CMP | BF(6) | RA(addrhi) | RB(r1)); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } + + /* Use a conditional branch-and-link so that we load a pointer to + somewhere within the current opcode, for passing on to the helper. + This address cannot be used for a tail call, but it's shorter + than forming an address from scratch. */ + *label_ptr = s->code_ptr; + retranst = ((uint16_t *) s->code_ptr)[1] & ~3; + tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); } #endif -static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) { - int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap; + TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused)); + TCGMemOp opc, bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, r2, addr_reg2; + int mem_index; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + bswap = opc & MO_BSWAP; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif mem_index = *args; - s_bits = opc & 3; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, s_bits, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read), - &label_ptr - ); + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, opc & MO_SIZE, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; -#else - bswap = 1; -#endif - - switch (opc) { + switch (opc & MO_SSIZE) { default: - case 0: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); + case MO_UB: + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); break; - case 0|4: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg)); + case MO_SB: + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSB | RA(datalo) | RS(datalo)); break; - case 1: - if (bswap) - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0)); + case MO_UW: + tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo)); break; - case 1|4: + case MO_SW: if (bswap) { - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg)); + tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSH | RA(datalo) | RS(datalo)); + } else { + tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo)); } - else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0)); break; - case 2: - if (bswap) - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0)); + case MO_UL: + tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo)); break; - case 3: + case MO_Q: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, LWBRX | TAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWZX | TAB (data_reg2, rbase, r0)); - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r1)); -#else - if (r0 == data_reg2) { - tcg_out32 (s, LWZ | RT (0) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0); - } - else { - tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - } -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + } else { + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 1, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, + addrhi, mem_index, s->code_ptr, label_ptr); #endif } -static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) { - int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase; + TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused)); + TCGMemOp opc, bswap, s_bits; #ifdef CONFIG_SOFTMMU - int mem_index, r2, addr_reg2; + int mem_index; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + bswap = opc & MO_BSWAP; + s_bits = opc & MO_SIZE; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif mem_index = *args; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, opc & 3, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write), - &label_ptr - ); + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, s_bits, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; -#else - bswap = 1; -#endif - switch (opc) { - case 0: - tcg_out32 (s, STBX | SAB (data_reg, rbase, r0)); + switch (s_bits) { + case MO_8: + tcg_out32(s, STBX | SAB(datalo, rbase, addrlo)); break; - case 1: - if (bswap) - tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STHX | SAB (data_reg, rbase, r0)); + case MO_16: + tcg_out32(s, (bswap ? STHBRX : STHX) | SAB(datalo, rbase, addrlo)); break; - case 2: - if (bswap) - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STWX | SAB (data_reg, rbase, r0)); + case MO_32: + default: + tcg_out32(s, (bswap ? STWBRX : STWX) | SAB(datalo, rbase, addrlo)); break; - case 3: + case MO_64: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0)); - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWX | SAB (data_reg, rbase, r1)); -#else - tcg_out32 (s, STW | RS (data_reg2) | RA (r0)); - tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4); -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | RS(datahi) | RA(addrlo)); + tcg_out32(s, STW | RS(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 0, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); #endif } #if defined(CONFIG_SOFTMMU) -static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - int s_bits; - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; - - s_bits = opc & 3; - - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); - - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + TCGReg ir, datalo, datahi; + TCGMemOp opc = l->opc; + + reloc_pc14 (l->label_ptr[0], (uintptr_t)s->code_ptr); + + ir = TCG_REG_R4; + if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); + ir |= 1; #endif - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1); - tcg_out32 (s, (tcg_target_long) raddr); - switch (opc) { - case 0|4: - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); - break; - case 1|4: - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3)); - break; - case 0: - case 1: - case 2: - if (data_reg != 3) - tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3); - break; - case 3: - if (data_reg == 3) { - if (data_reg2 == 4) { - tcg_out_mov (s, TCG_TYPE_I32, 0, 4); - tcg_out_mov (s, TCG_TYPE_I32, 4, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 0); - } - else { - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 4); - } - } - else { - if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4); - if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); + tcg_out32(s, MFSPR | RT(ir++) | LR); + tcg_out_b(s, LK, (uintptr_t)ld_trampolines[opc & ~MO_SIGN]); + + datalo = l->datalo_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3)); + break; + case MO_SW: + tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3)); + break; + default: + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3); + break; + case MO_Q: + datahi = l->datahi_reg; + if (datalo != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + } else if (datahi != TCG_REG_R4) { + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0); } break; } - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_b (s, 0, (uintptr_t)l->raddr); } -static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; - - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); - - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + TCGReg ir, datalo; + TCGMemOp opc = l->opc; + + reloc_pc14 (l->label_ptr[0], (tcg_target_long) s->code_ptr); + + ir = TCG_REG_R4; + if (TARGET_LONG_BITS == 32) { + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); + ir |= 1; #endif + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrhi_reg); + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } - switch (opc) { - case 0: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (24) - | ME (31))); + datalo = l->datalo_reg; + switch (opc & MO_SIZE) { + case MO_8: + tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) + | SH (0) | MB (24) | ME (31))); break; - case 1: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (16) - | ME (31))); + case MO_16: + tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) + | SH (0) | MB (16) | ME (31))); break; - case 2: - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); + default: + tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); break; - case 3: + case MO_64: #ifdef TCG_TARGET_CALL_ALIGN_ARGS ir |= 1; #endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2); - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->datahi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); break; } ir++; - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1); - tcg_out32 (s, (tcg_target_long) raddr); - tcg_out_b (s, 0, (tcg_target_long) raddr); -} - -void tcg_out_tb_finalize(TCGContext *s) -{ - int i; - TCGLabelQemuLdst *label; - - /* qemu_ld/st slow paths */ - for (i = 0; i < s->nb_qemu_ldst_labels; i++) { - label = (TCGLabelQemuLdst *) &s->qemu_ldst_labels[i]; - if (label->is_ld) { - tcg_out_qemu_ld_slow_path (s, label); - } - else { - tcg_out_qemu_st_slow_path (s, label); - } - } + tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); + tcg_out32(s, MFSPR | RT(ir++) | LR); + tcg_out_b(s, LK, (uintptr_t)st_trampolines[opc]); + tcg_out_b(s, 0, (uintptr_t)l->raddr); } #endif #ifdef CONFIG_SOFTMMU static void emit_ldst_trampoline (TCGContext *s, const void *ptr) { - tcg_out32 (s, MFSPR | RT (3) | LR); - tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); - tcg_out32 (s, MTSPR | RS (3) | LR); tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); - tcg_out_b (s, 0, (tcg_target_long) ptr); + tcg_out_call (s, (tcg_target_long) ptr, 1, 0); } #endif @@ -1052,24 +953,27 @@ static void tcg_target_qemu_prologue (TCGContext *s) tcg_out32 (s, BCLR | BO_ALWAYS); #ifdef CONFIG_SOFTMMU - for (i = 0; i < 4; ++i) { - ld_trampolines[i] = s->code_ptr; - emit_ldst_trampoline (s, qemu_ld_helpers[i]); - - st_trampolines[i] = s->code_ptr; - emit_ldst_trampoline (s, qemu_st_helpers[i]); + for (i = 0; i < 16; ++i) { + if (qemu_ld_helpers[i]) { + ld_trampolines[i] = s->code_ptr; + emit_ldst_trampoline(s, qemu_ld_helpers[i]); + } + if (qemu_st_helpers[i]) { + st_trampolines[i] = s->code_ptr; + emit_ldst_trampoline(s, qemu_st_helpers[i]); + } } #endif } -static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - tcg_target_long arg2) +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + intptr_t arg2) { tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX); } -static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - tcg_target_long arg2) +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, + intptr_t arg2) { tcg_out_ldst (s, arg, arg1, arg2, STW, STWX); } @@ -1495,7 +1399,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; case INDEX_op_call: - tcg_out_call (s, args[0], const_args[0]); + tcg_out_call (s, args[0], const_args[0], LK); break; case INDEX_op_movi_i32: tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); @@ -1802,36 +1706,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32 (s, NOR | SAB (args[1], args[0], args[1])); break; - case INDEX_op_qemu_ld8u: + case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args, 0); break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: + case INDEX_op_qemu_ld_i64: tcg_out_qemu_ld(s, args, 1); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); - break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); - break; - case INDEX_op_qemu_st8: + case INDEX_op_qemu_st_i32: tcg_out_qemu_st(s, args, 0); break; - case INDEX_op_qemu_st16: + case INDEX_op_qemu_st_i64: tcg_out_qemu_st(s, args, 1); break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); - break; case INDEX_op_ext8s_i32: tcg_out32 (s, EXTSB | RS (args[1]) | RA (args[0])); @@ -2015,29 +1901,15 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_bswap32_i32, { "r", "r" } }, #if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, - - { INDEX_op_qemu_st8, { "K", "K" } }, - { INDEX_op_qemu_st16, { "K", "K" } }, - { INDEX_op_qemu_st32, { "K", "K" } }, - { INDEX_op_qemu_st64, { "M", "M", "M" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "K", "K" } }, + { INDEX_op_qemu_st_i64, { "M", "M", "M" } }, #else - { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } }, - - { INDEX_op_qemu_st8, { "K", "K", "K" } }, - { INDEX_op_qemu_st16, { "K", "K", "K" } }, - { INDEX_op_qemu_st32, { "K", "K", "K" } }, - { INDEX_op_qemu_st64, { "M", "M", "M", "M" } }, + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "K", "K", "K" } }, + { INDEX_op_qemu_st_i64, { "M", "M", "M", "M" } }, #endif { INDEX_op_ext8s_i32, { "r", "r" } }, diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b42d97cc24..e3395e301c 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -96,11 +96,15 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 + +#define TCG_TARGET_HAS_new_ldst 1 #define TCG_AREG0 TCG_REG_R27 #define tcg_qemu_tb_exec(env, tb_ptr) \ - ((long __attribute__ ((longcall)) \ + ((uintptr_t __attribute__ ((longcall)) \ (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) #endif diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 0678de2045..6109d862db 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-ldst.h" + #define TCG_CT_CONST_S16 0x100 #define TCG_CT_CONST_U16 0x200 #define TCG_CT_CONST_S32 0x400 @@ -31,13 +33,11 @@ static uint8_t *tb_ret_addr; -#define FAST_PATH - #if TARGET_LONG_BITS == 32 -#define LD_ADDR LWZU +#define LD_ADDR LWZ #define CMP_L 0 #else -#define LD_ADDR LDU +#define LD_ADDR LD #define CMP_L (1<<21) #endif @@ -99,7 +99,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, + TCG_REG_R14, /* call saved registers */ TCG_REG_R15, TCG_REG_R16, TCG_REG_R17, @@ -109,29 +109,24 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R21, TCG_REG_R22, TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, -#ifdef __APPLE__ - TCG_REG_R2, -#endif - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, -#ifndef __APPLE__ + TCG_REG_R12, /* call clobbered, non-arguments */ TCG_REG_R11, -#endif - TCG_REG_R12, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27 + TCG_REG_R10, /* call clobbered, arguments */ + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, }; static const int tcg_target_call_iarg_regs[] = { @@ -173,58 +168,74 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_R31 }; -static uint32_t reloc_pc24_val (void *pc, tcg_target_long target) +static inline bool in_range_b(tcg_target_long target) +{ + return target == sextract64(target, 0, 26); +} + +static uint32_t reloc_pc24_val(void *pc, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) pc; - if ((disp << 38) >> 38 != disp) - tcg_abort (); + disp = target - (tcg_target_long)pc; + assert(in_range_b(disp)); return disp & 0x3fffffc; } -static void reloc_pc24 (void *pc, tcg_target_long target) +static void reloc_pc24(void *pc, tcg_target_long target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3fffffc) - | reloc_pc24_val (pc, target); + *(uint32_t *)pc = (*(uint32_t *)pc & ~0x3fffffc) + | reloc_pc24_val(pc, target); } -static uint16_t reloc_pc14_val (void *pc, tcg_target_long target) +static uint16_t reloc_pc14_val(void *pc, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) pc; - if (disp != (int16_t) disp) - tcg_abort (); + disp = target - (tcg_target_long)pc; + if (disp != (int16_t) disp) { + tcg_abort(); + } return disp & 0xfffc; } -static void reloc_pc14 (void *pc, tcg_target_long target) +static void reloc_pc14(void *pc, tcg_target_long target) +{ + *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xfffc) - | reloc_pc14_val (pc, target); + unsigned retrans = *(uint32_t *)s->code_ptr & 0x3fffffc; + tcg_out32(s, insn | retrans); } -static void patch_reloc (uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *(uint32_t *)s->code_ptr & 0xfffc; + tcg_out32(s, insn | retrans); +} + +static void patch_reloc(uint8_t *code_ptr, int type, + intptr_t value, intptr_t addend) { value += addend; switch (type) { case R_PPC_REL14: - reloc_pc14 (code_ptr, value); + reloc_pc14(code_ptr, value); break; case R_PPC_REL24: - reloc_pc24 (code_ptr, value); + reloc_pc24(code_ptr, value); break; default: - tcg_abort (); + tcg_abort(); } } /* parse target specific constraints */ -static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { const char *ct_str; @@ -232,29 +243,29 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) switch (ct_str[0]) { case 'A': case 'B': case 'C': case 'D': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg (ct->u.regs, 3 + ct_str[0] - 'A'); + tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); break; case 'r': ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; case 'L': /* qemu_ld constraint */ ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); #endif break; case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); #endif break; case 'I': @@ -284,8 +295,8 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static int tcg_target_const_match (tcg_target_long val, - const TCGArgConstraint *arg_ct) +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; if (ct & TCG_CT_CONST) { @@ -425,7 +436,7 @@ static int tcg_target_const_match (tcg_target_long val, #define STHX XO31(407) #define STWX XO31(151) -#define SPR(a,b) ((((a)<<5)|(b))<<11) +#define SPR(a, b) ((((a)<<5)|(b))<<11) #define LR SPR(8, 0) #define CTR SPR(9, 0) @@ -439,7 +450,7 @@ static int tcg_target_const_match (tcg_target_long val, #define SRADI XO31(413<<1) #define TW XO31( 4) -#define TRAP (TW | TO (31)) +#define TRAP (TW | TO(31)) #define RT(r) ((r)<<21) #define RS(r) ((r)<<21) @@ -467,9 +478,9 @@ static int tcg_target_const_match (tcg_target_long val, #define BB(n, c) (((c)+((n)*4))<<11) #define BC_(n, c) (((c)+((n)*4))<<6) -#define BO_COND_TRUE BO (12) -#define BO_COND_FALSE BO ( 4) -#define BO_ALWAYS BO (20) +#define BO_COND_TRUE BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS BO(20) enum { CR_LT, @@ -479,16 +490,16 @@ enum { }; static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI (7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, + [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, }; /* The low bit here is set if the RA and RB fields must be inverted. */ @@ -508,15 +519,17 @@ static const uint32_t tcg_to_isel[] = { static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out32 (s, OR | SAB (arg, ret, arg)); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } } static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, int sh, int mb) { - sh = SH (sh & 0x1f) | (((sh >> 5) & 1) << 1); - mb = MB64 ((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb); + sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); + mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); } static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, @@ -636,8 +649,8 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) } else if (mask_operand(c, &mb, &me)) { tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { - tcg_out_movi(s, TCG_TYPE_I32, 0, c); - tcg_out32(s, AND | SAB(src, dst, 0)); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } } @@ -658,8 +671,8 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) tcg_out_rld(s, RLDICL, dst, src, 0, mb); } } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, c); - tcg_out32(s, AND | SAB(src, dst, 0)); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } } @@ -686,408 +699,487 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) tcg_out_zori32(s, dst, src, c, XORI, XORIS); } -static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) +static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) s->code_ptr; - if ((disp << 38) >> 38 == disp) - tcg_out32 (s, B | (disp & 0x3fffffc) | mask); - else { - tcg_out_movi (s, TCG_TYPE_I64, 0, (tcg_target_long) target); - tcg_out32 (s, MTSPR | RS (0) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS | mask); + disp = target - (tcg_target_long)s->code_ptr; + if (in_range_b(disp)) { + tcg_out32(s, B | (disp & 0x3fffffc) | mask); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | mask); } } -static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) +static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) { #ifdef __APPLE__ if (const_arg) { - tcg_out_b (s, LK, arg); - } - else { - tcg_out32 (s, MTSPR | RS (arg) | LR); - tcg_out32 (s, BCLR | BO_ALWAYS | LK); + tcg_out_b(s, LK, arg); + } else { + tcg_out32(s, MTSPR | RS(arg) | LR); + tcg_out32(s, BCLR | BO_ALWAYS | LK); } #else - int reg; + TCGReg reg = arg; + int ofs = 0; if (const_arg) { - reg = 2; - tcg_out_movi (s, TCG_TYPE_I64, reg, arg); + /* Look through the descriptor. If the branch is in range, and we + don't have to spend too much effort on building the toc. */ + intptr_t tgt = ((intptr_t *)arg)[0]; + intptr_t toc = ((intptr_t *)arg)[1]; + intptr_t diff = tgt - (intptr_t)s->code_ptr; + + if (in_range_b(diff) && toc == (uint32_t)toc) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc); + tcg_out_b(s, LK, tgt); + return; + } + + /* Fold the low bits of the constant into the addresses below. */ + ofs = (int16_t)arg; + if (ofs + 8 < 0x8000) { + arg -= ofs; + } else { + ofs = 0; + } + reg = TCG_REG_R2; + tcg_out_movi(s, TCG_TYPE_I64, reg, arg); } - else reg = arg; - tcg_out32 (s, LD | RT (0) | RA (reg)); - tcg_out32 (s, MTSPR | RA (0) | CTR); - tcg_out32 (s, LD | RT (11) | RA (reg) | 16); - tcg_out32 (s, LD | RT (2) | RA (reg) | 8); - tcg_out32 (s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, LD | TAI(TCG_REG_R0, reg, ofs)); + tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); + tcg_out32(s, LD | TAI(TCG_REG_R2, reg, ofs + 8)); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif } -static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, int op1, int op2) +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset) { - if (offset == (int16_t) offset) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + TCGReg rs = TCG_REG_R2; + + assert(rt != TCG_REG_R2 && base != TCG_REG_R2); + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + } + break; + case STD: + align = 3; + break; + case STB: case STH: case STW: + break; } -} -static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, int op1, int op2) -{ - if (offset == (int16_t) (offset & ~3)) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); + tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); + return; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); } } -#if defined (CONFIG_SOFTMMU) +static const uint32_t qemu_ldx_opc[16] = { + [MO_UB] = LBZX, + [MO_UW] = LHZX, + [MO_UL] = LWZX, + [MO_Q] = LDX, + [MO_SW] = LHAX, + [MO_SL] = LWAX, + [MO_BSWAP | MO_UB] = LBZX, + [MO_BSWAP | MO_UW] = LHBRX, + [MO_BSWAP | MO_UL] = LWBRX, + [MO_BSWAP | MO_Q] = LDBRX, +}; -#include "exec/softmmu_defs.h" +static const uint32_t qemu_stx_opc[16] = { + [MO_UB] = STBX, + [MO_UW] = STHX, + [MO_UL] = STWX, + [MO_Q] = STDX, + [MO_BSWAP | MO_UB] = STBX, + [MO_BSWAP | MO_UW] = STHBRX, + [MO_BSWAP | MO_UL] = STWBRX, + [MO_BSWAP | MO_Q] = STDBRX, +}; +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + * int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static const void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; -static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, - TCGReg addr_reg, int s_bits, int offset) +/* Perform the TLB load and compare. Places the result of the comparison + in CR7, loads the addend of the TLB into R3, and returns the register + containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg, + int mem_index, bool is_read) { -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); - - tcg_out_rlw(s, RLWINM, r0, addr_reg, - 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), - 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), - 31 - CPU_TLB_ENTRY_BITS); - tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); - tcg_out32(s, LWZU | TAI(r1, r0, offset)); - tcg_out_rlw(s, RLWINM, r2, addr_reg, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); -#else - tcg_out_rld (s, RLDICL, r0, addr_reg, - 64 - TARGET_PAGE_BITS, - 64 - CPU_TLB_BITS); - tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS); + int cmp_off + = (is_read + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + TCGReg base = TCG_AREG0; + + /* Extract the page index, shifted into place for tlb index. */ + if (TARGET_LONG_BITS == 32) { + /* Zero-extend the address into a place helpful for further use. */ + tcg_out_ext32u(s, TCG_REG_R4, addr_reg); + addr_reg = TCG_REG_R4; + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg, + 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); + } - tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); - tcg_out32(s, LD_ADDR | TAI(r1, r0, offset)); + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0)); + base = TCG_REG_R2; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } - if (!s_bits) { - tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS); + /* Extraction and shifting, part 2. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); } - else { - tcg_out_rld (s, RLDICL, r2, addr_reg, - 64 - TARGET_PAGE_BITS, - TARGET_PAGE_BITS - s_bits); - tcg_out_rld (s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0); + + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + + /* Load the tlb comparator. */ + tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off)); + + /* Load the TLB addend for use on the fast path. Do this asap + to minimize any load use delay. */ + tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off)); + + /* Clear the non-page, non-alignment bits from the address. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (!s_bits) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); } -#endif -} -#endif -static const uint32_t qemu_ldx_opc[8] = { -#ifdef TARGET_WORDS_BIGENDIAN - LBZX, LHZX, LWZX, LDX, - 0, LHAX, LWAX, LDX -#else - LBZX, LHBRX, LWBRX, LDBRX, - 0, 0, 0, LDBRX, -#endif -}; + tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L); -static const uint32_t qemu_stx_opc[4] = { -#ifdef TARGET_WORDS_BIGENDIAN - STBX, STHX, STWX, STDX -#else - STBX, STHBRX, STWBRX, STDBRX, -#endif -}; + return addr_reg; +} -static const uint32_t qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + int data_reg, int addr_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} -static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg addr_reg, data_reg, r0, r1, rbase; - uint32_t insn, s_bits; -#ifdef CONFIG_SOFTMMU - TCGReg r2, ir; - int mem_index; - void *label1_ptr, *label2_ptr; -#endif + TCGMemOp opc = lb->opc; - data_reg = *args++; - addr_reg = *args++; - s_bits = opc & 3; + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); -#ifdef CONFIG_SOFTMMU - mem_index = *args; + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0); - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); - tcg_out_tlb_read (s, r0, r1, r2, addr_reg, s_bits, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_read)); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L); + tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[opc & ~MO_SIGN], 1); - label1_ptr = s->code_ptr; -#ifdef FAST_PATH - tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE); -#endif + if (opc & MO_SIGN) { + uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; + tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3); + } - /* slow path */ - ir = 3; - tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index); + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} - tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOp opc = lb->opc; + TCGMemOp s_bits = opc & MO_SIZE; - if (opc & 4) { - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(data_reg) | RS(3)); - } else if (data_reg != 3) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3); - } - label2_ptr = s->code_ptr; - tcg_out32 (s, B); + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); - /* label1: fast path */ -#ifdef FAST_PATH - reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr); + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0); + + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); + + tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg, + 0, 64 - (1 << (3 + s_bits))); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index); + tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR); + + tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1); + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) +{ + TCGReg rbase; + uint32_t insn; + TCGMemOp s_bits = opc & MO_SIZE; +#ifdef CONFIG_SOFTMMU + void *label_ptr; #endif - /* r0 now contains &env->tlb_table[mem_index][index].addr_read */ - tcg_out32(s, LD | TAI(r0, r0, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read))); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ +#ifdef CONFIG_SOFTMMU + addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); -#endif - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_R2, addr_reg); + addr_reg = TCG_REG_R2; + } #endif insn = qemu_ldx_opc[opc]; if (!HAVE_ISA_2_06 && insn == LDBRX) { - tcg_out32(s, ADDI | TAI(r1, r0, 4)); - tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0)); - tcg_out32(s, LWBRX | TAB( r1, rbase, r1)); - tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0); + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4)); + tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg)); + tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); + tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0); } else if (insn) { - tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); } else { - insn = qemu_ldx_opc[s_bits]; - tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; + tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); insn = qemu_exts_opc[s_bits]; - tcg_out32 (s, insn | RA(data_reg) | RS(data_reg)); + tcg_out32(s, insn | RA(data_reg) | RS(data_reg)); } #ifdef CONFIG_SOFTMMU - reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); + add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #endif } -static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOp opc, int mem_index) { - TCGReg addr_reg, r0, r1, rbase, data_reg; + TCGReg rbase; uint32_t insn; #ifdef CONFIG_SOFTMMU - TCGReg r2, ir; - int mem_index; - void *label1_ptr, *label2_ptr; + void *label_ptr; #endif - data_reg = *args++; - addr_reg = *args++; - #ifdef CONFIG_SOFTMMU - mem_index = *args; - - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - - tcg_out_tlb_read (s, r0, r1, r2, addr_reg, opc, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)); - - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L); - - label1_ptr = s->code_ptr; -#ifdef FAST_PATH - tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE); -#endif + addr_reg = tcg_out_tlb_read(s, opc & MO_SIZE, addr_reg, mem_index, false); - /* slow path */ - ir = 3; - tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); - tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index); - - tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1); - - label2_ptr = s->code_ptr; - tcg_out32 (s, B); - - /* label1: fast path */ -#ifdef FAST_PATH - reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr); -#endif - - tcg_out32 (s, (LD - | RT (r0) - | RA (r0) - | (offsetof (CPUTLBEntry, addend) - - offsetof (CPUTLBEntry, addr_write)) - )); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); -#endif - r1 = 3; - r0 = addr_reg; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_R2, addr_reg); + addr_reg = TCG_REG_R2; + } #endif insn = qemu_stx_opc[opc]; if (!HAVE_ISA_2_06 && insn == STDBRX) { - tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0)); - tcg_out32(s, ADDI | TAI(r1, r0, 4)); - tcg_out_shri64(s, 0, data_reg, 32); - tcg_out32(s, STWBRX | SAB(0, rbase, r1)); + tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg)); + tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4)); + tcg_out_shri64(s, TCG_REG_R0, data_reg, 32); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2)); } else { - tcg_out32(s, insn | SAB(data_reg, rbase, r0)); + tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg)); } #ifdef CONFIG_SOFTMMU - reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); + add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #endif } -static void tcg_target_qemu_prologue (TCGContext *s) +#define FRAME_SIZE ((int) \ + ((8 /* back chain */ \ + + 8 /* CR */ \ + + 8 /* LR */ \ + + 8 /* compiler doubleword */ \ + + 8 /* link editor doubleword */ \ + + 8 /* TOC save area */ \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 \ + + 15) & ~15)) + +#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8) + +static void tcg_target_qemu_prologue(TCGContext *s) { - int i, frame_size; -#ifndef __APPLE__ - uint64_t addr; -#endif + int i; - frame_size = 0 - + 8 /* back chain */ - + 8 /* CR */ - + 8 /* LR */ - + 8 /* compiler doubleword */ - + 8 /* link editor doubleword */ - + 8 /* TOC save area */ - + TCG_STATIC_CALL_ARGS_SIZE - + ARRAY_SIZE (tcg_target_callee_save_regs) * 8 - + CPU_TEMP_BUF_NLONGS * sizeof(long) - ; - frame_size = (frame_size + 15) & ~15; - - tcg_set_frame (s, TCG_REG_CALL_STACK, frame_size - - CPU_TEMP_BUF_NLONGS * sizeof (long), - CPU_TEMP_BUF_NLONGS * sizeof (long)); + tcg_set_frame(s, TCG_REG_CALL_STACK, + REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long), + CPU_TEMP_BUF_NLONGS * sizeof(long)); #ifndef __APPLE__ /* First emit adhoc function descriptor */ - addr = (uint64_t) s->code_ptr + 24; - tcg_out32 (s, addr >> 32); tcg_out32 (s, addr); /* entry point */ + tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */ s->code_ptr += 16; /* skip TOC and environment pointer */ #endif /* Prologue */ - tcg_out32 (s, MFSPR | RT (0) | LR); - tcg_out32 (s, STDU | RS (1) | RA (1) | (-frame_size & 0xffff)); - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (STD - | RS (tcg_target_callee_save_regs[i]) - | RA (1) - | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); - tcg_out32 (s, STD | RS (0) | RA (1) | (frame_size + 16)); + tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); + tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, + REG_SAVE_BOT + i * 8)); + } + tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); #ifdef CONFIG_USE_GUEST_BASE if (GUEST_BASE) { - tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); - tcg_regset_set_reg (s->reserved_regs, TCG_GUEST_BASE_REG); + tcg_out_movi(s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif - tcg_out_mov (s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32 (s, MTSPR | RS (tcg_target_call_iarg_regs[1]) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); /* Epilogue */ tb_ret_addr = s->code_ptr; - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (LD - | RT (tcg_target_callee_save_regs[i]) - | RA (1) - | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); - tcg_out32(s, LD | TAI(0, 1, frame_size + 16)); - tcg_out32(s, MTSPR | RS(0) | LR); - tcg_out32(s, ADDI | TAI(1, 1, frame_size)); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1, + REG_SAVE_BOT + i * 8)); + } + tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); tcg_out32(s, BCLR | BO_ALWAYS); } -static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - tcg_target_long arg2) +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX); - else - tcg_out_ldsta (s, ret, arg1, arg2, LD, LDX); + int opi, opx; + + if (type == TCG_TYPE_I32) { + opi = LWZ, opx = LWZX; + } else { + opi = LD, opx = LDX; + } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); } -static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - tcg_target_long arg2) +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, arg, arg1, arg2, STW, STWX); - else - tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX); + int opi, opx; + + if (type == TCG_TYPE_I32) { + opi = STW, opx = STWX; + } else { + opi = STD, opx = STDX; + } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, @@ -1109,8 +1201,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, op = CMPI; imm = 1; break; - } - else if ((uint16_t) arg2 == arg2) { + } else if ((uint16_t) arg2 == arg2) { op = CMPLI; imm = 1; break; @@ -1151,7 +1242,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, break; default: - tcg_abort (); + tcg_abort(); } op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); @@ -1159,8 +1250,8 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); } else { if (const_arg2) { - tcg_out_movi(s, type, 0, arg2); - arg2 = 0; + tcg_out_movi(s, type, TCG_REG_R0, arg2); + arg2 = TCG_REG_R0; } tcg_out32(s, op | RA(arg1) | RB(arg2)); } @@ -1181,8 +1272,8 @@ static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) tcg_out32(s, ADDIC | TAI(dst, src, -1)); tcg_out32(s, SUBFE | TAB(dst, dst, src)); } else { - tcg_out32(s, ADDIC | TAI(0, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, 0, src)); + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); } } @@ -1295,13 +1386,13 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_GE: case TCG_COND_GEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); goto crtest; case TCG_COND_LE: case TCG_COND_LEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); crtest: tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); if (crop) { @@ -1312,22 +1403,19 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, break; default: - tcg_abort (); + tcg_abort(); } } -static void tcg_out_bc (TCGContext *s, int bc, int label_index) +static void tcg_out_bc(TCGContext *s, int bc, int label_index) { TCGLabel *l = &s->labels[label_index]; - if (l->has_value) - tcg_out32 (s, bc | reloc_pc14_val (s->code_ptr, l->u.value)); - else { - uint16_t val = *(uint16_t *) &s->code_ptr[2]; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, bc | (val & 0xfffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL14, label_index, 0); + if (l->has_value) { + tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value)); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0); + tcg_out_bc_noaddr(s, bc); } } @@ -1363,7 +1451,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ if (v2 == 0) { - tcg_out_movi(s, type, 0, 0); + tcg_out_movi(s, type, TCG_REG_R0, 0); } tcg_out32(s, isel | TAB(dest, v1, v2)); } else { @@ -1387,37 +1475,36 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } } -void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) +void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr) { TCGContext s; unsigned long patch_size; s.code_ptr = (uint8_t *) jmp_addr; - tcg_out_b (&s, 0, addr); + tcg_out_b(&s, 0, addr); patch_size = s.code_ptr - (uint8_t *) jmp_addr; - flush_icache_range (jmp_addr, jmp_addr + patch_size); + flush_icache_range(jmp_addr, jmp_addr + patch_size); } -static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) { TCGArg a0, a1, a2; int c; switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi (s, TCG_TYPE_I64, TCG_REG_R3, args[0]); - tcg_out_b (s, 0, (tcg_target_long) tb_ret_addr); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, args[0]); + tcg_out_b(s, 0, (tcg_target_long)tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { - /* direct jump method */ - + /* Direct jump method. */ s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; s->code_ptr += 28; - } - else { - tcg_abort (); + } else { + /* Indirect jump method. */ + tcg_abort(); } s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; break; @@ -1426,83 +1513,70 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, TCGLabel *l = &s->labels[args[0]]; if (l->has_value) { - tcg_out_b (s, 0, l->u.value); - } - else { - uint32_t val = *(uint32_t *) s->code_ptr; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, B | (val & 0x3fffffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL24, args[0], 0); + tcg_out_b(s, 0, l->u.value); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0); + tcg_out_b_noaddr(s, B); } } break; case INDEX_op_call: - tcg_out_call (s, args[0], const_args[0]); + tcg_out_call(s, args[0], const_args[0]); break; case INDEX_op_movi_i32: - tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); break; case INDEX_op_movi_i64: - tcg_out_movi (s, TCG_TYPE_I64, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); break; case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); - tcg_out32 (s, EXTSB | RS (args[0]) | RA (args[0])); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHZ, LHZX); + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); break; case INDEX_op_ld16s_i32: case INDEX_op_ld16s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHA, LHAX); + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LWZ, LWZX); + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); break; case INDEX_op_ld32s_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LWA, LWAX); + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LD, LDX); + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STB, STBX); + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STH, STHX); + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); break; case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STW, STWX); + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); break; case INDEX_op_st_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], STD, STDX); + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; case INDEX_op_add_i32: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l, h; do_addi_32: - l = (int16_t)a2; - h = a2 - l; - if (h) { - tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16)); - a1 = a0; - } - if (l || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -1610,32 +1684,33 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_div_i32: - tcg_out32 (s, DIVW | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); break; case INDEX_op_divu_i32: - tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; case INDEX_op_shl_i32: if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]); } else { - tcg_out32 (s, SLW | SAB (args[1], args[0], args[2])); + tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i32: if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31); } else { - tcg_out32 (s, SRW | SAB (args[1], args[0], args[2])); + tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_sar_i32: - if (const_args[2]) - tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2])); - else - tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2])); + if (const_args[2]) { + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + } else { + tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_rotl_i32: if (const_args[2]) { @@ -1649,8 +1724,8 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); } else { - tcg_out32(s, SUBFIC | TAI(0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], 0) + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) | MB(0) | ME(31)); } break; @@ -1667,43 +1742,19 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_neg_i32: case INDEX_op_neg_i64: - tcg_out32 (s, NEG | RT (args[0]) | RA (args[1])); + tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); break; case INDEX_op_not_i32: case INDEX_op_not_i64: - tcg_out32 (s, NOR | SAB (args[1], args[0], args[1])); + tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); break; case INDEX_op_add_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l0, h1, h2; do_addi_64: - /* We can always split any 32-bit signed constant into 3 pieces. - Note the positive 0x80000000 coming from the sub_i64 path, - handled with the same code we need for eg 0x7fff8000. */ - assert(a2 == (int32_t)a2 || a2 == 0x80000000); - l0 = (int16_t)a2; - h1 = a2 - l0; - h2 = 0; - if (h1 < 0 && (int64_t)a2 > 0) { - h2 = 0x40000000; - h1 = a2 - h2 - l0; - } - assert((TCGArg)h2 + h1 + l0 == a2); - - if (h2) { - tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16)); - a1 = a0; - } - if (h1) { - tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16)); - a1 = a0; - } - if (l0 || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l0)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -1725,24 +1776,26 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_shl_i64: - if (const_args[2]) + if (const_args[2]) { tcg_out_shli64(s, args[0], args[1], args[2]); - else - tcg_out32 (s, SLD | SAB (args[1], args[0], args[2])); + } else { + tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_shr_i64: - if (const_args[2]) + if (const_args[2]) { tcg_out_shri64(s, args[0], args[1], args[2]); - else - tcg_out32 (s, SRD | SAB (args[1], args[0], args[2])); + } else { + tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_sar_i64: if (const_args[2]) { - int sh = SH (args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32 (s, SRADI | RA (args[0]) | RS (args[1]) | sh); + int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); + tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + } else { + tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); } - else - tcg_out32 (s, SRAD | SAB (args[1], args[0], args[2])); break; case INDEX_op_rotl_i64: if (const_args[2]) { @@ -1755,8 +1808,8 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, if (const_args[2]) { tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); } else { - tcg_out32(s, SUBFIC | TAI(0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], 0) | MB64(0)); + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); } break; @@ -1769,45 +1822,19 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; case INDEX_op_div_i64: - tcg_out32 (s, DIVD | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); break; case INDEX_op_divu_i64: - tcg_out32 (s, DIVDU | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld (s, args, 0); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld (s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld (s, args, 1); - break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld (s, args, 1 | 4); - break; - case INDEX_op_qemu_ld32: - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld (s, args, 2); - break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld (s, args, 2 | 4); + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld (s, args, 3); - break; - case INDEX_op_qemu_st8: - tcg_out_qemu_st (s, args, 0); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st (s, args, 1); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st (s, args, 2); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st (s, args, 3); + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); break; case INDEX_op_ext8s_i32: @@ -1822,16 +1849,16 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, c = EXTSW; goto gen_ext; gen_ext: - tcg_out32 (s, c | RS (args[1]) | RA (args[0])); + tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; case INDEX_op_setcond_i32: - tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2]); break; case INDEX_op_setcond_i64: - tcg_out_setcond (s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2]); break; case INDEX_op_bswap16_i32: @@ -1873,9 +1900,9 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = 0; + a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; if (a0 == a1) { - a0 = 0; + a0 = TCG_REG_R0; a2 = a1; } @@ -1975,34 +2002,16 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i64: - { - int oph = (opc == INDEX_op_mulu2_i64 ? MULHDU : MULHD); - TCGReg outl = args[0], outh = args[1]; - a0 = args[2], a1 = args[3]; - - if (outl == a0 || outl == a1) { - if (outh == a0 || outh == a1) { - outl = TCG_REG_R0; - } else { - tcg_out32(s, oph | TAB(outh, a0, a1)); - oph = 0; - } - } - tcg_out32(s, MULLD | TAB(outl, a0, a1)); - if (oph != 0) { - tcg_out32(s, oph | TAB(outh, a0, a1)); - } - if (outl != args[0]) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], outl); - } - } + case INDEX_op_muluh_i64: + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i64: + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); break; default: - tcg_dump_ops (s); - tcg_abort (); + tcg_dump_ops(s); + tcg_abort(); } } @@ -2088,19 +2097,10 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_neg_i64, { "r", "r" } }, { INDEX_op_not_i64, { "r", "r" } }, - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L" } }, - - { INDEX_op_qemu_st8, { "S", "S" } }, - { INDEX_op_qemu_st16, { "S", "S" } }, - { INDEX_op_qemu_st32, { "S", "S" } }, - { INDEX_op_qemu_st64, { "S", "S" } }, + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_st_i64, { "S", "S" } }, { INDEX_op_ext8s_i32, { "r", "r" } }, { INDEX_op_ext16s_i32, { "r", "r" } }, @@ -2124,13 +2124,13 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, - { INDEX_op_muls2_i64, { "r", "r", "r", "r" } }, - { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } }, + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + { INDEX_op_muluh_i64, { "r", "r", "r" } }, { -1 }, }; -static void tcg_target_init (TCGContext *s) +static void tcg_target_init(TCGContext *s) { #ifdef CONFIG_GETAUXVAL unsigned long hwcap = getauxval(AT_HWCAP); @@ -2139,13 +2139,11 @@ static void tcg_target_init (TCGContext *s) } #endif - tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - tcg_regset_set32 (tcg_target_call_clobber_regs, 0, + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_R0) | -#ifdef __APPLE__ (1 << TCG_REG_R2) | -#endif (1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | @@ -2155,16 +2153,65 @@ static void tcg_target_init (TCGContext *s) (1 << TCG_REG_R9) | (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | - (1 << TCG_REG_R12) - ); + (1 << TCG_REG_R12)); - tcg_regset_clear (s->reserved_regs); - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R1); -#ifndef __APPLE__ - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R2); + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* mem temp */ +#ifdef __APPLE__ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */ #endif - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ + + tcg_add_target_add_op_defs(ppc_op_defs); +} + +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_PPC64 + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = 0x78, /* sleb128 -8 */ + .cie.return_column = 65, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, 1, /* DW_CFA_def_cfa r1, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x11, 65, 0x7e, /* DW_CFA_offset_extended_sf, lr, 16 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + uint8_t *p = &debug_frame.fde_reg_ofs[3]; + int i; + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { + p[0] = 0x80 + tcg_target_callee_save_regs[i]; + p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * 8)) / 8; + } + + debug_frame.fde.func_start = (tcg_target_long) buf; + debug_frame.fde.func_len = buf_size; - tcg_add_target_add_op_defs (ppc_op_defs); + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 48fc6e2e54..7ee50b6c6c 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -95,6 +95,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 @@ -116,8 +118,12 @@ typedef enum { #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 + +#define TCG_TARGET_HAS_new_ldst 1 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index f229f1c346..0a4f3be0e9 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +#include "tcg-be-null.h" + /* We only support generating code for 64-bit mode. */ #if TCG_TARGET_REG_BITS != 64 #error "unsupported code generation mode" @@ -315,9 +317,6 @@ static const uint8_t tcg_cond_to_ltr_cond[] = { }; #ifdef CONFIG_SOFTMMU - -#include "exec/softmmu_defs.h" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { @@ -351,10 +350,10 @@ static uint8_t *tb_ret_addr; static uint64_t facilities; static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { - tcg_target_long code_ptr_tl = (tcg_target_long)code_ptr; - tcg_target_long pcrel2; + intptr_t code_ptr_tl = (intptr_t)code_ptr; + intptr_t pcrel2; /* ??? Not the usual definition of "addend". */ pcrel2 = (value - (code_ptr_tl + addend)) >> 1; @@ -771,7 +770,7 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, /* load data without address translation or endianness conversion */ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, tcg_target_long ofs) + TCGReg base, intptr_t ofs) { if (type == TCG_TYPE_I32) { tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); @@ -781,7 +780,7 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, tcg_target_long ofs) + TCGReg base, intptr_t ofs) { if (type == TCG_TYPE_I32) { tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 42ca36c0e9..10adb778c7 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -69,6 +69,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 @@ -94,6 +96,10 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 + +#define TCG_TARGET_HAS_new_ldst 0 extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid @@ -110,8 +116,7 @@ enum { TCG_AREG0 = TCG_REG_R10, }; -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { } diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 5bfd29c3b4..cbd1c91779 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-null.h" + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%g0", @@ -252,7 +254,7 @@ static inline int check_fit_i32(uint32_t val, unsigned int bits) } static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { uint32_t insn; value += addend; @@ -264,7 +266,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, *(uint32_t *)code_ptr = value; break; case R_SPARC_WDISP16: - value -= (long)code_ptr; + value -= (intptr_t)code_ptr; if (!check_fit_tl(value >> 2, 16)) { tcg_abort(); } @@ -274,7 +276,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, *(uint32_t *)code_ptr = insn; break; case R_SPARC_WDISP19: - value -= (long)code_ptr; + value -= (intptr_t)code_ptr; if (!check_fit_tl(value >> 2, 19)) { tcg_abort(); } @@ -436,13 +438,13 @@ static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); } @@ -831,8 +833,6 @@ static void tcg_target_qemu_prologue(TCGContext *s) #if defined(CONFIG_SOFTMMU) -#include "exec/softmmu_defs.h" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index dab52d7176..00f3a1848b 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -24,6 +24,14 @@ #ifndef TCG_TARGET_SPARC #define TCG_TARGET_SPARC 1 +#if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +#else +# error Unknown pointer size for tcg target +#endif + #define TCG_TARGET_WORDS_BIGENDIAN #define TCG_TARGET_NB_REGS 32 @@ -107,6 +115,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -134,20 +144,20 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif +#define TCG_TARGET_HAS_new_ldst 0 + #define TCG_AREG0 TCG_REG_I0 -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { - unsigned long p; - - p = start & ~(8UL - 1UL); - stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL); - - for (; p < stop; p += 8) + uintptr_t p; + for (p = start & -8; p < ((stop + 7) & -8); p += 8) { __asm__ __volatile__("flush\t%0" : : "r" (p)); + } } #endif diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h new file mode 100644 index 0000000000..284db0c70d --- /dev/null +++ b/tcg/tcg-be-ldst.h @@ -0,0 +1,90 @@ +/* + * TCG Backend Data: load-store optimization only. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifdef CONFIG_SOFTMMU +#define TCG_MAX_QEMU_LDST 640 + +typedef struct TCGLabelQemuLdst { + int is_ld:1; /* qemu_ld: 1, qemu_st: 0 */ + TCGMemOp opc:4; + TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ + TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ + TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ + TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ + int mem_index; /* soft MMU memory index */ + uint8_t *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ + uint8_t *label_ptr[2]; /* label pointers to be updated */ +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + int nb_ldst_labels; + TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST]; +} TCGBackendData; + + +/* + * Initialize TB backend data at the beginning of the TB. + */ + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->nb_ldst_labels = 0; +} + +/* + * Generate TB finalization at the end of block + */ + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); + +static void tcg_out_tb_finalize(TCGContext *s) +{ + TCGLabelQemuLdst *lb = s->be->ldst_labels; + int i, n = s->be->nb_ldst_labels; + + /* qemu_ld/st slow paths */ + for (i = 0; i < n; i++) { + if (lb[i].is_ld) { + tcg_out_qemu_ld_slow_path(s, lb + i); + } else { + tcg_out_qemu_st_slow_path(s, lb + i); + } + } +} + +/* + * Allocate a new TCGLabelQemuLdst entry. + */ + +static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s) +{ + TCGBackendData *be = s->be; + int n = be->nb_ldst_labels; + + assert(n < TCG_MAX_QEMU_LDST); + be->nb_ldst_labels = n + 1; + return &be->ldst_labels[n]; +} +#else +#include "tcg-be-null.h" +#endif /* CONFIG_SOFTMMU */ diff --git a/tcg/tcg-be-null.h b/tcg/tcg-be-null.h new file mode 100644 index 0000000000..74c57d5a6c --- /dev/null +++ b/tcg/tcg-be-null.h @@ -0,0 +1,43 @@ +/* + * TCG Backend Data: No backend data + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +typedef struct TCGBackendData { + /* Empty */ + char dummy; +} TCGBackendData; + + +/* + * Initialize TB backend data at the beginning of the TB. + */ + +static inline void tcg_out_tb_init(TCGContext *s) +{ +} + +/* + * Generate TB finalization at the end of block + */ + +static inline void tcg_out_tb_finalize(TCGContext *s) +{ +} diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 364964d8d4..7eabf22f01 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -137,24 +137,6 @@ static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, *tcg_ctx.gen_opparam_ptr++ = offset; } -static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val, - TCGv_i32 addr, TCGArg mem_index) -{ - *tcg_ctx.gen_opc_ptr++ = opc; - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val); - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(addr); - *tcg_ctx.gen_opparam_ptr++ = mem_index; -} - -static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val, - TCGv_i64 addr, TCGArg mem_index) -{ - *tcg_ctx.gen_opc_ptr++ = opc; - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val); - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(addr); - *tcg_ctx.gen_opparam_ptr++ = mem_index; -} - static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4) { @@ -361,6 +343,21 @@ static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1, *tcg_ctx.gen_opparam_ptr++ = arg6; } +static inline void tcg_add_param_i32(TCGv_i32 val) +{ + *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val); +} + +static inline void tcg_add_param_i64(TCGv_i64 val) +{ +#if TCG_TARGET_REG_BITS == 32 + *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val)); + *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val)); +#else + *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val); +#endif +} + static inline void gen_set_label(int n) { tcg_gen_op1i(INDEX_op_set_label, n); @@ -1039,10 +1036,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i32(); - tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), - TCGV_LOW(arg1), TCGV_LOW(arg2)); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(INDEX_op_nop); + if (TCG_TARGET_HAS_mulu2_i32) { + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + tcg_debug_assert(TCG_TARGET_HAS_muluh_i32); + tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + } tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); @@ -2401,6 +2406,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2420,6 +2431,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -2499,6 +2516,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else if (TCG_TARGET_HAS_mulu2_i64) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2540,6 +2563,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); int sizemask = 0; @@ -2568,11 +2597,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, #define tcg_global_mem_new tcg_global_mem_new_i32 #define tcg_temp_local_new() tcg_temp_local_new_i32() #define tcg_temp_free tcg_temp_free_i32 -#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32 -#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32 #define TCGV_UNUSED(x) TCGV_UNUSED_I32(x) #define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x) #define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b) +#define tcg_add_param_tl tcg_add_param_i32 +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32 #else #define TCGv TCGv_i64 #define tcg_temp_new() tcg_temp_new_i64() @@ -2580,11 +2610,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, #define tcg_global_mem_new tcg_global_mem_new_i64 #define tcg_temp_local_new() tcg_temp_local_new_i64() #define tcg_temp_free tcg_temp_free_i64 -#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64 -#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64 #define TCGV_UNUSED(x) TCGV_UNUSED_I64(x) #define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x) #define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b) +#define tcg_add_param_tl tcg_add_param_i64 +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64 #endif /* debug info: write the PC of the corresponding QEMU CPU instruction */ @@ -2599,7 +2630,7 @@ static inline void tcg_gen_debug_insn_start(uint64_t pc) #endif } -static inline void tcg_gen_exit_tb(tcg_target_long val) +static inline void tcg_gen_exit_tb(uintptr_t val) { tcg_gen_op1i(INDEX_op_exit_tb, val); } @@ -2616,197 +2647,67 @@ static inline void tcg_gen_goto_tb(unsigned idx) tcg_gen_op1i(INDEX_op_goto_tb, idx); } -#if TCG_TARGET_REG_BITS == 32 -static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld8u, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld8u, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); -#endif -} - -static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld8s, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld8s, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); -#endif -} - -static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld16u, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld16u, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); -#endif -} -static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld16s, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld16s, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); -#endif -} - -static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); -#endif -} - -static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); -#endif -} - -static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op4i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), addr, mem_index); -#else - tcg_gen_op5i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), - TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); -#endif -} - -static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_st8, arg, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_st8, TCGV_LOW(arg), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); -#endif -} - -static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_st16, arg, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_st16, TCGV_LOW(arg), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); -#endif -} - -static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_st32, arg, addr, mem_index); -#else - tcg_gen_op4i_i32(INDEX_op_qemu_st32, TCGV_LOW(arg), TCGV_LOW(addr), - TCGV_HIGH(addr), mem_index); -#endif -} - -static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) -{ -#if TARGET_LONG_BITS == 32 - tcg_gen_op4i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), addr, - mem_index); -#else - tcg_gen_op5i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), - TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); -#endif -} - -#define tcg_gen_ld_ptr(R, A, O) tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O)) -#define tcg_gen_discard_ptr(A) tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A)) - -#else /* TCG_TARGET_REG_BITS == 32 */ +void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp); static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8u, ret, addr, mem_index); + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB); } static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8s, ret, addr, mem_index); + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB); } static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16u, ret, addr, mem_index); + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW); } static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16s, ret, addr, mem_index); + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW); } static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) { -#if TARGET_LONG_BITS == 32 - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); -#else - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index); -#endif + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL); } static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) { -#if TARGET_LONG_BITS == 32 - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); -#else - tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index); -#endif + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL); } static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_ld64, ret, addr, mem_index); + tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEQ); } static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_st8, arg, addr, mem_index); + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB); } static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_st16, arg, addr, mem_index); + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW); } static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op(INDEX_op_qemu_st32, arg, addr, mem_index); + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL); } static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) { - tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index); + tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ); } -#define tcg_gen_ld_ptr(R, A, O) tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O)) -#define tcg_gen_discard_ptr(A) tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A)) - -#endif /* TCG_TARGET_REG_BITS != 32 */ - #if TARGET_LONG_BITS == 64 #define tcg_gen_movi_tl tcg_gen_movi_i64 #define tcg_gen_mov_tl tcg_gen_mov_i64 @@ -2965,17 +2866,25 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #endif #if TCG_TARGET_REG_BITS == 32 -#define tcg_gen_add_ptr(R, A, B) tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), \ - TCGV_PTR_TO_NAT(A), \ - TCGV_PTR_TO_NAT(B)) -#define tcg_gen_addi_ptr(R, A, B) tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), \ - TCGV_PTR_TO_NAT(A), (B)) -#define tcg_gen_ext_i32_ptr(R, A) tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A)) -#else /* TCG_TARGET_REG_BITS == 32 */ -#define tcg_gen_add_ptr(R, A, B) tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), \ - TCGV_PTR_TO_NAT(A), \ - TCGV_PTR_TO_NAT(B)) -#define tcg_gen_addi_ptr(R, A, B) tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), \ - TCGV_PTR_TO_NAT(A), (B)) -#define tcg_gen_ext_i32_ptr(R, A) tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A)) -#endif /* TCG_TARGET_REG_BITS != 32 */ +# define tcg_gen_ld_ptr(R, A, O) \ + tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O)) +# define tcg_gen_discard_ptr(A) \ + tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A)) +# define tcg_gen_add_ptr(R, A, B) \ + tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B)) +# define tcg_gen_addi_ptr(R, A, B) \ + tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) +# define tcg_gen_ext_i32_ptr(R, A) \ + tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A)) +#else +# define tcg_gen_ld_ptr(R, A, O) \ + tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O)) +# define tcg_gen_discard_ptr(A) \ + tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A)) +# define tcg_gen_add_ptr(R, A, B) \ + tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B)) +# define tcg_gen_addi_ptr(R, A, B) \ + tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) +# define tcg_gen_ext_i32_ptr(R, A) \ + tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A)) +#endif /* TCG_TARGET_REG_BITS == 32 */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index a8af5b96a4..d71707d9bb 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS @@ -176,79 +180,107 @@ DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT) #endif DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op - constants must be defined */ + +#define IMPL_NEW_LDST \ + (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \ + | IMPL(TCG_TARGET_HAS_new_ldst)) + +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +DEF(qemu_ld_i32, 1, 1, 2, IMPL_NEW_LDST) +DEF(qemu_st_i32, 0, 2, 2, IMPL_NEW_LDST) +# if TCG_TARGET_REG_BITS == 64 +DEF(qemu_ld_i64, 1, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +# else +DEF(qemu_ld_i64, 2, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, 3, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +# endif +#else +DEF(qemu_ld_i32, 1, 2, 2, IMPL_NEW_LDST) +DEF(qemu_st_i32, 0, 3, 2, IMPL_NEW_LDST) +DEF(qemu_ld_i64, 2, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, 4, 2, IMPL_NEW_LDST | TCG_OPF_64BIT) +#endif + +#undef IMPL_NEW_LDST + +#define IMPL_OLD_LDST \ + (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \ + | IMPL(!TCG_TARGET_HAS_new_ldst)) + #if TCG_TARGET_REG_BITS == 32 #if TARGET_LONG_BITS == 32 -DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST) #else -DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 2, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST) #else -DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 2, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST) #else -DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 2, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST) #else -DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 2, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST) #else -DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 2, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 2, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) #else -DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 2, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST) #else -DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 3, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST) #else -DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 3, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST) #else -DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 3, 1, IMPL_OLD_LDST) #endif #if TARGET_LONG_BITS == 32 -DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 3, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) #else -DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 4, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) #endif #else /* TCG_TARGET_REG_BITS == 32 */ -DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld32u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld32s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_ld64, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) -DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) +DEF(qemu_st64, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT) #endif /* TCG_TARGET_REG_BITS != 32 */ +#undef IMPL_OLD_LDST + #undef IMPL #undef IMPL64 #undef DEF @@ -49,10 +49,10 @@ #include "tcg-op.h" -#if TCG_TARGET_REG_BITS == 64 -# define ELF_CLASS ELFCLASS64 -#else +#if UINTPTR_MAX == UINT32_MAX # define ELF_CLASS ELFCLASS32 +#else +# define ELF_CLASS ELFCLASS64 #endif #ifdef HOST_WORDS_BIGENDIAN # define ELF_DATA ELFDATA2MSB @@ -65,8 +65,8 @@ /* Forward declarations for functions declared in tcg-target.c and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); -static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend); +static void patch_reloc(uint8_t *code_ptr, int type, + intptr_t value, intptr_t addend); /* The CIE and FDE header definitions will be common to all hosts. */ typedef struct { @@ -82,8 +82,8 @@ typedef struct { typedef struct QEMU_PACKED { uint32_t len __attribute__((aligned((sizeof(void *))))); uint32_t cie_offset; - tcg_target_long func_start; - tcg_target_long func_len; + uintptr_t func_start; + uintptr_t func_len; } DebugFrameFDEHeader; static void tcg_register_jit_int(void *buf, size_t size, @@ -93,16 +93,19 @@ static void tcg_register_jit_int(void *buf, size_t size, /* Forward declarations for functions declared and used in tcg-target.c. */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - tcg_target_long arg2); + intptr_t arg2); static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args); static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - tcg_target_long arg2); + intptr_t arg2); static int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct); +static void tcg_out_tb_init(TCGContext *s); +static void tcg_out_tb_finalize(TCGContext *s); + TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, @@ -121,20 +124,29 @@ static inline void tcg_out8(TCGContext *s, uint8_t v) static inline void tcg_out16(TCGContext *s, uint16_t v) { - *(uint16_t *)s->code_ptr = v; - s->code_ptr += 2; + uint8_t *p = s->code_ptr; + *(uint16_t *)p = v; + s->code_ptr = p + 2; } static inline void tcg_out32(TCGContext *s, uint32_t v) { - *(uint32_t *)s->code_ptr = v; - s->code_ptr += 4; + uint8_t *p = s->code_ptr; + *(uint32_t *)p = v; + s->code_ptr = p + 4; +} + +static inline void tcg_out64(TCGContext *s, uint64_t v) +{ + uint8_t *p = s->code_ptr; + *(uint64_t *)p = v; + s->code_ptr = p + 8; } /* label relocation processing */ static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, - int label_index, long addend) + int label_index, intptr_t addend) { TCGLabel *l; TCGRelocation *r; @@ -160,11 +172,12 @@ static void tcg_out_label(TCGContext *s, int label_index, void *ptr) { TCGLabel *l; TCGRelocation *r; - tcg_target_long value = (tcg_target_long)ptr; + intptr_t value = (intptr_t)ptr; l = &s->labels[label_index]; - if (l->has_value) + if (l->has_value) { tcg_abort(); + } r = l->u.first_reloc; while (r != NULL) { patch_reloc(r->ptr, r->type, value, r->addend); @@ -244,12 +257,41 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } +#include "helper.h" + +typedef struct TCGHelperInfo { + void *func; + const char *name; +} TCGHelperInfo; + +static const TCGHelperInfo all_helpers[] = { +#define GEN_HELPER 2 +#include "helper.h" + + /* Include tcg-runtime.c functions. */ + { tcg_helper_div_i32, "div_i32" }, + { tcg_helper_rem_i32, "rem_i32" }, + { tcg_helper_divu_i32, "divu_i32" }, + { tcg_helper_remu_i32, "remu_i32" }, + + { tcg_helper_shl_i64, "shl_i64" }, + { tcg_helper_shr_i64, "shr_i64" }, + { tcg_helper_sar_i64, "sar_i64" }, + { tcg_helper_div_i64, "div_i64" }, + { tcg_helper_rem_i64, "rem_i64" }, + { tcg_helper_divu_i64, "divu_i64" }, + { tcg_helper_remu_i64, "remu_i64" }, + { tcg_helper_mulsh_i64, "mulsh_i64" }, + { tcg_helper_muluh_i64, "muluh_i64" }, +}; + void tcg_context_init(TCGContext *s) { - int op, total_args, n; + int op, total_args, n, i; TCGOpDef *def; TCGArgConstraint *args_ct; int *sorted_args; + GHashTable *helper_table; memset(s, 0, sizeof(*s)); s->nb_globals = 0; @@ -275,6 +317,15 @@ void tcg_context_init(TCGContext *s) args_ct += n; } + /* Register helpers. */ + /* Use g_direct_hash/equal for direct pointer comparisons on func. */ + s->helpers = helper_table = g_hash_table_new(NULL, NULL); + + for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { + g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, + (gpointer)all_helpers[i].name); + } + tcg_target_init(s); } @@ -284,8 +335,7 @@ void tcg_prologue_init(TCGContext *s) s->code_buf = s->code_gen_prologue; s->code_ptr = s->code_buf; tcg_target_qemu_prologue(s); - flush_icache_range((tcg_target_ulong)s->code_buf, - (tcg_target_ulong)s->code_ptr); + flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { @@ -298,8 +348,7 @@ void tcg_prologue_init(TCGContext *s) #endif } -void tcg_set_frame(TCGContext *s, int reg, - tcg_target_long start, tcg_target_long size) +void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) { s->frame_start = start; s->frame_end = start + size; @@ -324,13 +373,7 @@ void tcg_func_start(TCGContext *s) s->gen_opc_ptr = s->gen_opc_buf; s->gen_opparam_ptr = s->gen_opparam_buf; -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) - /* Initialize qemu_ld/st labels to assist code generation at the end of TB - for TLB miss cases at the end of TB */ - s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) * - TCG_MAX_QEMU_LDST); - s->nb_qemu_ldst_labels = 0; -#endif + s->be = tcg_malloc(sizeof(TCGBackendData)); } static inline void tcg_temp_alloc(TCGContext *s, int n) @@ -382,7 +425,7 @@ TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name) } static inline int tcg_global_mem_new_internal(TCGType type, int reg, - tcg_target_long offset, + intptr_t offset, const char *name) { TCGContext *s = &tcg_ctx; @@ -442,21 +485,15 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg, return idx; } -TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, - const char *name) +TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name) { - int idx; - - idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); + int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); return MAKE_TCGV_I32(idx); } -TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, - const char *name) +TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name) { - int idx; - - idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); + int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); return MAKE_TCGV_I64(idx); } @@ -618,25 +655,6 @@ int tcg_check_temp_count(void) } #endif -void tcg_register_helper(void *func, const char *name) -{ - TCGContext *s = &tcg_ctx; - int n; - if ((s->nb_helpers + 1) > s->allocated_helpers) { - n = s->allocated_helpers; - if (n == 0) { - n = 4; - } else { - n *= 2; - } - s->helpers = realloc(s->helpers, n * sizeof(TCGHelperInfo)); - s->allocated_helpers = n; - } - s->helpers[s->nb_helpers].func = (tcg_target_ulong)func; - s->helpers[s->nb_helpers].name = name; - s->nb_helpers++; -} - /* Note: we convert the 64 bit args to 32 bit and do some alignment and endian swap. Maybe it would be better to do the alignment and endian swap in tcg_reg_alloc_call(). */ @@ -793,6 +811,188 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, } #endif +static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) +{ + switch (op & MO_SIZE) { + case MO_8: + op &= ~MO_BSWAP; + break; + case MO_16: + break; + case MO_32: + if (!is64) { + op &= ~MO_SIGN; + } + break; + case MO_64: + if (!is64) { + tcg_abort(); + } + break; + } + if (st) { + op &= ~MO_SIGN; + } + return op; +} + +static const TCGOpcode old_ld_opc[8] = { + [MO_UB] = INDEX_op_qemu_ld8u, + [MO_SB] = INDEX_op_qemu_ld8s, + [MO_UW] = INDEX_op_qemu_ld16u, + [MO_SW] = INDEX_op_qemu_ld16s, +#if TCG_TARGET_REG_BITS == 32 + [MO_UL] = INDEX_op_qemu_ld32, + [MO_SL] = INDEX_op_qemu_ld32, +#else + [MO_UL] = INDEX_op_qemu_ld32u, + [MO_SL] = INDEX_op_qemu_ld32s, +#endif + [MO_Q] = INDEX_op_qemu_ld64, +}; + +static const TCGOpcode old_st_opc[4] = { + [MO_UB] = INDEX_op_qemu_st8, + [MO_UW] = INDEX_op_qemu_st16, + [MO_UL] = INDEX_op_qemu_st32, + [MO_Q] = INDEX_op_qemu_st64, +}; + +void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 0, 0); + + if (TCG_TARGET_HAS_new_ldst) { + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; + return; + } + + /* The old opcodes only support target-endian memory operations. */ + assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); + assert(old_ld_opc[memop & MO_SSIZE] != 0); + + if (TCG_TARGET_REG_BITS == 32) { + *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; + } else { + TCGv_i64 val64 = tcg_temp_new_i64(); + + *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; + tcg_add_param_i64(val64); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; + + tcg_gen_trunc_i64_i32(val, val64); + tcg_temp_free_i64(val64); + } +} + +void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 0, 1); + + if (TCG_TARGET_HAS_new_ldst) { + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; + return; + } + + /* The old opcodes only support target-endian memory operations. */ + assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); + assert(old_st_opc[memop & MO_SIZE] != 0); + + if (TCG_TARGET_REG_BITS == 32) { + *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; + tcg_add_param_i32(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; + } else { + TCGv_i64 val64 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(val64, val); + + *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; + tcg_add_param_i64(val64); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; + + tcg_temp_free_i64(val64); + } +} + +void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 1, 0); + +#if TCG_TARGET_REG_BITS == 32 + if ((memop & MO_SIZE) < MO_64) { + tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); + if (memop & MO_SIGN) { + tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(val), 0); + } + return; + } +#endif + + if (TCG_TARGET_HAS_new_ldst) { + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64; + tcg_add_param_i64(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; + return; + } + + /* The old opcodes only support target-endian memory operations. */ + assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); + assert(old_ld_opc[memop & MO_SSIZE] != 0); + + *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE]; + tcg_add_param_i64(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; +} + +void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 1, 1); + +#if TCG_TARGET_REG_BITS == 32 + if ((memop & MO_SIZE) < MO_64) { + tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); + return; + } +#endif + + if (TCG_TARGET_HAS_new_ldst) { + *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64; + tcg_add_param_i64(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = memop; + *tcg_ctx.gen_opparam_ptr++ = idx; + return; + } + + /* The old opcodes only support target-endian memory operations. */ + assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8); + assert(old_st_opc[memop & MO_SIZE] != 0); + + *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE]; + tcg_add_param_i64(val); + tcg_add_param_tl(addr); + *tcg_ctx.gen_opparam_ptr++ = idx; +} static void tcg_reg_alloc_start(TCGContext *s) { @@ -849,47 +1049,14 @@ char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg) return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg)); } -static int helper_cmp(const void *p1, const void *p2) -{ - const TCGHelperInfo *th1 = p1; - const TCGHelperInfo *th2 = p2; - if (th1->func < th2->func) - return -1; - else if (th1->func == th2->func) - return 0; - else - return 1; -} - -/* find helper definition (Note: A hash table would be better) */ -static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val) +/* Find helper name. */ +static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) { - int m, m_min, m_max; - TCGHelperInfo *th; - tcg_target_ulong v; - - if (unlikely(!s->helpers_sorted)) { - qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo), - helper_cmp); - s->helpers_sorted = 1; + const char *ret = NULL; + if (s->helpers) { + ret = g_hash_table_lookup(s->helpers, (gpointer)val); } - - /* binary search */ - m_min = 0; - m_max = s->nb_helpers - 1; - while (m_min <= m_max) { - m = (m_min + m_max) >> 1; - th = &s->helpers[m]; - v = th->func; - if (v == val) - return th; - else if (val < v) { - m_max = m - 1; - } else { - m_min = m + 1; - } - } - return NULL; + return ret; } static const char * const cond_name[] = @@ -908,6 +1075,22 @@ static const char * const cond_name[] = [TCG_COND_GTU] = "gtu" }; +static const char * const ldst_name[] = +{ + [MO_UB] = "ub", + [MO_SB] = "sb", + [MO_LEUW] = "leuw", + [MO_LESW] = "lesw", + [MO_LEUL] = "leul", + [MO_LESL] = "lesl", + [MO_LEQ] = "leq", + [MO_BEUW] = "beuw", + [MO_BESW] = "besw", + [MO_BEUL] = "beul", + [MO_BESL] = "besl", + [MO_BEQ] = "beq", +}; + void tcg_dump_ops(TCGContext *s) { const uint16_t *opc_ptr; @@ -974,7 +1157,7 @@ void tcg_dump_ops(TCGContext *s) } } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) { tcg_target_ulong val; - TCGHelperInfo *th; + const char *name; nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -982,9 +1165,9 @@ void tcg_dump_ops(TCGContext *s) qemu_log(" %s %s,$", def->name, tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0])); val = args[1]; - th = tcg_find_helper(s, val); - if (th) { - qemu_log("%s", th->name); + name = tcg_find_helper(s, val); + if (name) { + qemu_log("%s", name); } else { if (c == INDEX_op_movi_i32) { qemu_log("0x%x", (uint32_t)val); @@ -1036,6 +1219,17 @@ void tcg_dump_ops(TCGContext *s) } i = 1; break; + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_st_i64: + if (args[k] < ARRAY_SIZE(ldst_name) && ldst_name[args[k]]) { + qemu_log(",%s", ldst_name[args[k++]]); + } else { + qemu_log(",$0x%" TCG_PRIlx, args[k++]); + } + i = 1; + break; default: i = 0; break; @@ -1243,12 +1437,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static void tcg_liveness_analysis(TCGContext *s) { int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; - TCGOpcode op, op_new; + TCGOpcode op, op_new, op_new2; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; uint16_t dead_args; uint8_t sync_args; + bool have_op_new2; s->gen_opc_ptr++; /* skip end */ @@ -1385,29 +1580,52 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: + op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_muluh_i32; + have_op_new2 = TCG_TARGET_HAS_muluh_i32; + goto do_mul2; case INDEX_op_muls2_i32: op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_mulsh_i32; + have_op_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: + op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_muluh_i64; + have_op_new2 = TCG_TARGET_HAS_muluh_i64; + goto do_mul2; case INDEX_op_muls2_i64: op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_mulsh_i64; + have_op_new2 = TCG_TARGET_HAS_mulsh_i64; + goto do_mul2; do_mul2: args -= 4; nb_iargs = 2; nb_oargs = 2; - /* Likewise, test for the high part of the operation dead. */ if (dead_temps[args[1]] && !mem_temps[args[1]]) { if (dead_temps[args[0]] && !mem_temps[args[0]]) { + /* Both parts of the operation are dead. */ goto do_remove; } + /* The high part of the operation is dead; generate the low. */ s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[3]; - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); - /* Fall through and mark the single-word operation live. */ - nb_oargs = 1; + } else if (have_op_new2 && dead_temps[args[0]] + && !mem_temps[args[0]]) { + /* The low part of the operation is dead; generate the high. */ + s->gen_opc_buf[op_index] = op = op_new2; + args[0] = args[1]; + args[1] = args[2]; + args[2] = args[3]; + } else { + goto do_not_remove; } + assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); + /* Mark the single-word operation live. */ + nb_oargs = 1; goto do_not_remove; default: @@ -1581,7 +1799,7 @@ static void temp_allocate_frame(TCGContext *s, int temp) ts->mem_offset = s->current_frame_offset; ts->mem_reg = s->frame_reg; ts->mem_allocated = 1; - s->current_frame_offset += (tcg_target_long)sizeof(tcg_target_long); + s->current_frame_offset += sizeof(tcg_target_long); } /* sync register 'reg' by saving it to the corresponding temporary */ @@ -2044,7 +2262,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; TCGArg arg, func_arg; TCGTemp *ts; - tcg_target_long stack_offset, call_stack_size, func_addr; + intptr_t stack_offset; + size_t call_stack_size; + uintptr_t func_addr; int const_func_arg, allocate_args; TCGRegSet allocated_regs; const TCGArgConstraint *arg_ct; @@ -2283,6 +2503,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, s->code_buf = gen_code_buf; s->code_ptr = gen_code_buf; + tcg_out_tb_init(s); + args = s->gen_opparam_buf; op_index = 0; @@ -2356,10 +2578,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, #endif } the_end: -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* Generate TB finalization at the end of block */ tcg_out_tb_finalize(s); -#endif return -1; } @@ -2382,8 +2602,7 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) tcg_gen_code_common(s, gen_code_buf, -1); /* flush instruction cache */ - flush_icache_range((tcg_target_ulong)gen_code_buf, - (tcg_target_ulong)s->code_ptr); + flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr); return s->code_ptr - gen_code_buf; } @@ -21,15 +21,23 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + +#ifndef TCG_H +#define TCG_H + #include "qemu-common.h" -/* Target word size (must be identical to pointer size). */ -#if UINTPTR_MAX == UINT32_MAX -# define TCG_TARGET_REG_BITS 32 -#elif UINTPTR_MAX == UINT64_MAX -# define TCG_TARGET_REG_BITS 64 -#else -# error Unknown pointer size for tcg target +#include "tcg-target.h" + +/* Default target word size to pointer size. */ +#ifndef TCG_TARGET_REG_BITS +# if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +# elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +# else +# error Unknown pointer size for tcg target +# endif #endif #if TCG_TARGET_REG_BITS == 32 @@ -46,7 +54,6 @@ typedef uint64_t tcg_target_ulong; #error unsupported #endif -#include "tcg-target.h" #include "tcg-runtime.h" #if TCG_TARGET_NB_REGS <= 32 @@ -85,6 +92,8 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -134,13 +143,13 @@ typedef struct TCGRelocation { struct TCGRelocation *next; int type; uint8_t *ptr; - tcg_target_long addend; -} TCGRelocation; + intptr_t addend; +} TCGRelocation; typedef struct TCGLabel { int has_value; union { - tcg_target_ulong value; + uintptr_t value; TCGRelocation *first_reloc; } u; } TCGLabel; @@ -173,9 +182,12 @@ typedef enum TCGType { TCG_TYPE_REG = TCG_TYPE_I64, #endif - /* An alias for the size of the native pointer. We don't currently - support any hosts with 64-bit registers and 32-bit pointers. */ - TCG_TYPE_PTR = TCG_TYPE_REG, + /* An alias for the size of the native pointer. */ +#if UINTPTR_MAX == UINT32_MAX + TCG_TYPE_PTR = TCG_TYPE_I32, +#else + TCG_TYPE_PTR = TCG_TYPE_I64, +#endif /* An alias for the size of the target "long", aka register. */ #if TARGET_LONG_BITS == 64 @@ -185,6 +197,60 @@ typedef enum TCGType { #endif } TCGType; +/* Constants for qemu_ld and qemu_st for the Memory Operation field. */ +typedef enum TCGMemOp { + MO_8 = 0, + MO_16 = 1, + MO_32 = 2, + MO_64 = 3, + MO_SIZE = 3, /* Mask for the above. */ + + MO_SIGN = 4, /* Sign-extended, otherwise zero-extended. */ + + MO_BSWAP = 8, /* Host reverse endian. */ +#ifdef HOST_WORDS_BIGENDIAN + MO_LE = MO_BSWAP, + MO_BE = 0, +#else + MO_LE = 0, + MO_BE = MO_BSWAP, +#endif +#ifdef TARGET_WORDS_BIGENDIAN + MO_TE = MO_BE, +#else + MO_TE = MO_LE, +#endif + + /* Combinations of the above, for ease of use. */ + MO_UB = MO_8, + MO_UW = MO_16, + MO_UL = MO_32, + MO_SB = MO_SIGN | MO_8, + MO_SW = MO_SIGN | MO_16, + MO_SL = MO_SIGN | MO_32, + MO_Q = MO_64, + + MO_LEUW = MO_LE | MO_UW, + MO_LEUL = MO_LE | MO_UL, + MO_LESW = MO_LE | MO_SW, + MO_LESL = MO_LE | MO_SL, + MO_LEQ = MO_LE | MO_Q, + + MO_BEUW = MO_BE | MO_UW, + MO_BEUL = MO_BE | MO_UL, + MO_BESW = MO_BE | MO_SW, + MO_BESL = MO_BE | MO_SL, + MO_BEQ = MO_BE | MO_Q, + + MO_TEUW = MO_TE | MO_UW, + MO_TEUL = MO_TE | MO_UL, + MO_TESW = MO_TE | MO_SW, + MO_TESL = MO_TE | MO_SL, + MO_TEQ = MO_TE | MO_Q, + + MO_SSIZE = MO_SIZE | MO_SIGN, +} TCGMemOp; + typedef tcg_target_ulong TCGArg; /* Define a type and accessor macros for variables. Using a struct is @@ -199,24 +265,6 @@ typedef tcg_target_ulong TCGArg; are aliases for target_ulong and host pointer sized values respectively. */ -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -/* Macros/structures for qemu_ld/st IR code optimization: - TCG_MAX_HELPER_LABELS is defined as same as OPC_BUF_SIZE in exec-all.h. */ -#define TCG_MAX_QEMU_LDST 640 - -typedef struct TCGLabelQemuLdst { - int is_ld:1; /* qemu_ld: 1, qemu_st: 0 */ - int opc:4; - int addrlo_reg; /* reg index for low word of guest virtual addr */ - int addrhi_reg; /* reg index for high word of guest virtual addr */ - int datalo_reg; /* reg index for low word to be loaded or stored */ - int datahi_reg; /* reg index for high word to be loaded or stored */ - int mem_index; /* soft MMU memory index */ - uint8_t *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ - uint8_t *label_ptr[2]; /* label pointers to be updated */ -} TCGLabelQemuLdst; -#endif - #ifdef CONFIG_DEBUG_TCG #define DEBUG_TCGV 1 #endif @@ -380,7 +428,7 @@ typedef struct TCGTemp { int reg; tcg_target_long val; int mem_reg; - tcg_target_long mem_offset; + intptr_t mem_offset; unsigned int fixed_reg:1; unsigned int mem_coherent:1; unsigned int mem_allocated:1; @@ -393,11 +441,6 @@ typedef struct TCGTemp { const char *name; } TCGTemp; -typedef struct TCGHelperInfo { - tcg_target_ulong func; - const char *name; -} TCGHelperInfo; - typedef struct TCGContext TCGContext; struct TCGContext { @@ -427,18 +470,15 @@ struct TCGContext { into account fixed registers */ int reg_to_temp[TCG_TARGET_NB_REGS]; TCGRegSet reserved_regs; - tcg_target_long current_frame_offset; - tcg_target_long frame_start; - tcg_target_long frame_end; + intptr_t current_frame_offset; + intptr_t frame_start; + intptr_t frame_end; int frame_reg; uint8_t *code_ptr; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ - TCGHelperInfo *helpers; - int nb_helpers; - int allocated_helpers; - int helpers_sorted; + GHashTable *helpers; #ifdef CONFIG_PROFILER /* profiling info */ @@ -484,12 +524,8 @@ struct TCGContext { TBContext tb_ctx; -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) - /* labels info for qemu_ld/st IRs - The labels help to generate TLB miss case codes at the end of TB */ - TCGLabelQemuLdst *qemu_ldst_labels; - int nb_qemu_ldst_labels; -#endif + /* The TCGBackendData structure is private to tcg-target.c. */ + struct TCGBackendData *be; }; extern TCGContext tcg_ctx; @@ -522,12 +558,10 @@ void tcg_func_start(TCGContext *s); int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset); -void tcg_set_frame(TCGContext *s, int reg, - tcg_target_long start, tcg_target_long size); +void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size); TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name); -TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, - const char *name); +TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name); TCGv_i32 tcg_temp_new_internal_i32(int temp_local); static inline TCGv_i32 tcg_temp_new_i32(void) { @@ -541,8 +575,7 @@ void tcg_temp_free_i32(TCGv_i32 arg); char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg); TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name); -TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, - const char *name); +TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name); TCGv_i64 tcg_temp_new_internal_i64(int temp_local); static inline TCGv_i64 tcg_temp_new_i64(void) { @@ -637,11 +670,11 @@ do {\ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); -#if TCG_TARGET_REG_BITS == 32 +#if UINTPTR_MAX == UINT32_MAX #define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n)) #define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n)) -#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32((tcg_target_long)(V))) +#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32((intptr_t)(V))) #define tcg_global_reg_new_ptr(R, N) \ TCGV_NAT_TO_PTR(tcg_global_reg_new_i32((R), (N))) #define tcg_global_mem_new_ptr(R, O, N) \ @@ -652,7 +685,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); #define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I64(n)) #define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I64(GET_TCGV_PTR(n)) -#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64((tcg_target_long)(V))) +#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64((intptr_t)(V))) #define tcg_global_reg_new_ptr(R, N) \ TCGV_NAT_TO_PTR(tcg_global_reg_new_i64((R), (N))) #define tcg_global_mem_new_ptr(R, O, N) \ @@ -671,8 +704,6 @@ TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_def); /* only used for debugging purposes */ -void tcg_register_helper(void *func, const char *name); -const char *tcg_helper_get_name(TCGContext *s, void *func); void tcg_dump_ops(TCGContext *s); void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf); @@ -731,13 +762,92 @@ TCGv_i64 tcg_const_local_i64(int64_t val); #if !defined(tcg_qemu_tb_exec) # define tcg_qemu_tb_exec(env, tb_ptr) \ - ((tcg_target_ulong (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, \ - tb_ptr) + ((uintptr_t (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) #endif void tcg_register_jit(void *buf, size_t buf_size); -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -/* Generate TB finalization at the end of block */ -void tcg_out_tb_finalize(TCGContext *s); +/* + * Memory helpers that will be used by TCG generated code. + */ +#ifdef CONFIG_SOFTMMU +/* Value zero-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr); + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + int mmu_idx, uintptr_t retaddr); +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + int mmu_idx, uintptr_t retaddr); +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + int mmu_idx, uintptr_t retaddr); +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +/* Temporary aliases until backends are converted. */ +#ifdef TARGET_WORDS_BIGENDIAN +# define helper_ret_ldsw_mmu helper_be_ldsw_mmu +# define helper_ret_lduw_mmu helper_be_lduw_mmu +# define helper_ret_ldsl_mmu helper_be_ldsl_mmu +# define helper_ret_ldul_mmu helper_be_ldul_mmu +# define helper_ret_ldq_mmu helper_be_ldq_mmu +# define helper_ret_stw_mmu helper_be_stw_mmu +# define helper_ret_stl_mmu helper_be_stl_mmu +# define helper_ret_stq_mmu helper_be_stq_mmu +#else +# define helper_ret_ldsw_mmu helper_le_ldsw_mmu +# define helper_ret_lduw_mmu helper_le_lduw_mmu +# define helper_ret_ldsl_mmu helper_le_ldsl_mmu +# define helper_ret_ldul_mmu helper_le_ldul_mmu +# define helper_ret_ldq_mmu helper_le_ldq_mmu +# define helper_ret_stw_mmu helper_le_stw_mmu +# define helper_ret_stl_mmu helper_le_stl_mmu +# define helper_ret_stq_mmu helper_le_stq_mmu #endif + +uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); +uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); +uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); +uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx); + +void helper_stb_mmu(CPUArchState *env, target_ulong addr, + uint8_t val, int mmu_idx); +void helper_stw_mmu(CPUArchState *env, target_ulong addr, + uint16_t val, int mmu_idx); +void helper_stl_mmu(CPUArchState *env, target_ulong addr, + uint32_t val, int mmu_idx); +void helper_stq_mmu(CPUArchState *env, target_ulong addr, + uint64_t val, int mmu_idx); +#endif /* CONFIG_SOFTMMU */ + +#endif /* TCG_H */ diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index e118bc7179..fc80704de8 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "tcg-be-null.h" + /* TODO list: * - See TODO comments in code. */ @@ -370,7 +372,7 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { /* tcg_out_reloc always uses the same type, addend. */ assert(type == sizeof(tcg_target_long)); @@ -415,13 +417,6 @@ static void tcg_out_i(TCGContext *s, tcg_target_ulong v) s->code_ptr += sizeof(tcg_target_ulong); } -/* Write 64 bit value. */ -static void tcg_out64(TCGContext *s, uint64_t v) -{ - *(uint64_t *)s->code_ptr = v; - s->code_ptr += sizeof(v); -} - /* Write opcode. */ static void tcg_out_op_t(TCGContext *s, TCGOpcode op) { @@ -488,7 +483,7 @@ static void tci_out_label(TCGContext *s, TCGArg arg) } static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - tcg_target_long arg2) + intptr_t arg2) { uint8_t *old_code_ptr = s->code_ptr; if (type == TCG_TYPE_I32) { @@ -677,7 +672,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: - /* TODO: Implementation of rotl_i64, rotr_i64 missing in tci.c. */ case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ tcg_out_r(s, args[0]); @@ -842,7 +836,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - tcg_target_long arg2) + intptr_t arg2) { uint8_t *old_code_ptr = s->code_ptr; if (type == TCG_TYPE_I32) { diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index d7fc14eb17..6e1da8c007 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -44,6 +44,14 @@ #define TCG_TARGET_INTERPRETER 1 +#if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +#else +# error Unknown pointer size for tci target +#endif + #ifdef CONFIG_DEBUG_TCG /* Enable debug output. */ #define CONFIG_DEBUG_TCG_INTERPRETER @@ -76,6 +84,8 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 @@ -100,15 +110,18 @@ #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 - #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ +#define TCG_TARGET_HAS_new_ldst 0 + /* Number of registers available. For 32 bit hosts, we need more than 8 registers (call arguments). */ /* #define TCG_TARGET_NB_REGS 8 */ @@ -166,11 +179,10 @@ typedef enum { void tci_disas(uint8_t opc); -tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); #define tcg_qemu_tb_exec tcg_qemu_tb_exec -static inline void flush_icache_range(tcg_target_ulong start, - tcg_target_ulong stop) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { } |