diff options
author | Chanho Park <chanho61.park@samsung.com> | 2014-09-05 20:35:53 +0900 |
---|---|---|
committer | Chanho Park <chanho61.park@samsung.com> | 2014-09-05 20:35:53 +0900 |
commit | 16b1353a36171ae06d63fd309f4772dbfb1da113 (patch) | |
tree | cf6c297ee81aba0d9b47f23d78a889667e7bce48 /tcg/ia64 | |
parent | a15119db2ff5c2fdfdeb913b297bf8aa3399132e (diff) | |
download | qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.gz qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.bz2 qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.zip |
Imported Upstream version 2.1.0upstream/2.1.0
Diffstat (limited to 'tcg/ia64')
-rw-r--r-- | tcg/ia64/tcg-target.c | 721 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 9 |
2 files changed, 331 insertions, 399 deletions
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 2d8e00cd9..6bc992464 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -23,8 +23,6 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" - /* * Register definitions */ @@ -221,10 +219,12 @@ enum { OPC_ALLOC_M34 = 0x02c00000000ull, OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, + OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, OPC_CMP_LT_A6 = 0x18000000000ull, OPC_CMP_LTU_A6 = 0x1a000000000ull, @@ -356,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) | (qp & 0x3f); } +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) { return opc @@ -683,112 +692,32 @@ static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) /* - * Relocations + * Relocations - Note that we never encode branches elsewhere than slot 2. */ -static inline void reloc_pcrel21b(void *pc, intptr_t target) +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) { - uint64_t imm; - int64_t disp; - int slot; - - slot = (intptr_t)pc & 3; - pc = (void *)((intptr_t)pc & ~3); - - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; + uint64_t imm = target - pc; - switch(slot) { - case 0: - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 8) & 0xfffffdc00003ffffull) - | ((imm & 0x100000) << 21) /* s */ - | ((imm & 0x0fffff) << 18); /* imm20b */ - break; - case 1: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xfffffffffffb8000ull) - | ((imm & 0x100000) >> 2) /* s */ - | ((imm & 0x0fffe0) >> 5); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x07ffffffffffffffull) - | ((imm & 0x00001f) << 59); /* imm20b */ - break; - case 2: - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fffffffffull) - | ((imm & 0x100000) << 39) /* s */ - | ((imm & 0x0fffff) << 36); /* imm20b */ - break; - } + pc->hi = (pc->hi & 0xf700000fffffffffull) + | ((imm & 0x100000) << 39) /* s */ + | ((imm & 0x0fffff) << 36); /* imm20b */ } -static inline uint64_t get_reloc_pcrel21b (void *pc) +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) { - int64_t low, high; - int slot; - - slot = (tcg_target_long) pc & 3; - pc = (void *)((tcg_target_long) pc & ~3); - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); + int64_t high = pc->hi; - switch(slot) { - case 0: - return ((low >> 21) & 0x100000) + /* s */ - ((low >> 18) & 0x0fffff); /* imm20b */ - case 1: - return ((high << 2) & 0x100000) + /* s */ - ((high << 5) & 0x0fffe0) + /* imm20b */ - ((low >> 59) & 0x00001f); /* imm20b */ - case 2: - return ((high >> 39) & 0x100000) + /* s */ - ((high >> 36) & 0x0fffff); /* imm20b */ - default: - tcg_abort(); - } + return ((high >> 39) & 0x100000) + /* s */ + ((high >> 36) & 0x0fffff); /* imm20b */ } -static inline void reloc_pcrel60b(void *pc, intptr_t target) -{ - int64_t disp; - uint64_t imm; - - disp = target - (intptr_t)pc; - imm = (uint64_t) disp >> 4; - - *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fff800000ull) - | (imm & 0x0800000000000000ull) /* s */ - | ((imm & 0x07fffff000000000ull) >> 36) /* imm39 */ - | ((imm & 0x00000000000fffffull) << 36); /* imm20b */ - *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x00003fffffffffffull) - | ((imm & 0x0000000ffff00000ull) << 28); /* imm39 */ -} - -static inline uint64_t get_reloc_pcrel60b (void *pc) -{ - int64_t low, high; - - low = (*(uint64_t *)(pc + 0)); - high = (*(uint64_t *)(pc + 8)); - - return ((high) & 0x0800000000000000ull) + /* s */ - ((high >> 36) & 0x00000000000fffffull) + /* imm20b */ - ((high << 36) & 0x07fffff000000000ull) + /* imm39 */ - ((low >> 28) & 0x0000000ffff00000ull); /* imm39 */ -} - - -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - value += addend; - switch (type) { - case R_IA64_PCREL21B: - reloc_pcrel21b(code_ptr, value); - break; - case R_IA64_PCREL60B: - reloc_pcrel60b(code_ptr, value); - default: - tcg_abort(); - } + assert(addend == 0); + assert(type == R_IA64_PCREL21B); + reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); } /* @@ -815,6 +744,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) #if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); #endif break; case 'Z': @@ -832,7 +762,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) { int ct; @@ -851,7 +781,7 @@ static inline int tcg_target_const_match(tcg_target_long val, * Code generation */ -static uint8_t *tb_ret_addr; +static tcg_insn_unit *tb_ret_addr; static inline void tcg_out_bundle(TCGContext *s, int template, uint64_t slot0, uint64_t slot1, @@ -862,9 +792,10 @@ static inline void tcg_out_bundle(TCGContext *s, int template, slot1 &= 0x1ffffffffffull; /* 41 bits */ slot2 &= 0x1ffffffffffull; /* 41 bits */ - *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template; - *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18); - s->code_ptr += 16; + *s->code_ptr++ = (tcg_insn_unit){ + (slot1 << 46) | (slot0 << 5) | template, + (slot2 << 23) | (slot1 >> 18) + }; } static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) @@ -899,33 +830,34 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_br(TCGContext *s, int label_index) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and memory are kept coherent during retranslation. */ - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); } -static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) { + uintptr_t func = desc->lo, gp = desc->hi, disp; + /* Look through the function descriptor. */ - uintptr_t disp, *desc = (uintptr_t *)addr; tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (desc[1]), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1])); - disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); + disp = (tcg_insn_unit *)func - s->code_ptr; tcg_out_bundle(s, mLX, INSN_NOP_M, tcg_opc_l4 (disp), @@ -933,32 +865,22 @@ static inline void tcg_out_calli(TCGContext *s, uintptr_t addr) TCG_REG_B0, disp)); } -static inline void tcg_out_callr(TCGContext *s, TCGReg addr) -{ - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R3, 8, addr), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3), - INSN_NOP_M, - tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); -} - static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) { - int64_t disp; - uint64_t imm; + uint64_t imm, opc1; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + /* At least arg == 0 is a common operation. */ + if (arg == sextract64(arg, 0, 22)) { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + opc1 = INSN_NOP_M; + } - disp = tb_ret_addr - s->code_ptr; - imm = (uint64_t)disp >> 4; + imm = tb_ret_addr - s->code_ptr; tcg_out_bundle(s, mLX, - INSN_NOP_M, + opc1, tcg_opc_l3 (imm), tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); } @@ -984,7 +906,7 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } - s->tb_next_offset[arg] = s->code_ptr - s->code_buf; + s->tb_next_offset[arg] = tcg_current_code_size(s); } static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) @@ -1505,19 +1427,22 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, TCGReg arg2, int label_index, int cmp4) { TCGLabel *l = &s->labels[label_index]; + uint64_t imm; - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + imm = l->u.value_ptr - s->code_ptr; } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0); } + + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); } static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, @@ -1558,238 +1483,285 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, } #if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x1fffff) + /* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the address of the addend TLB entry. - R57 is loaded with the address, zero extented on 32-bit targets. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, - TCGMemOp s_bits, uint64_t offset_rw, - uint64_t offset_addend) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, + R2 is loaded with the addend TLB entry. + R57 is loaded with the address, zero extented on 32-bit targets. + R1, R3 are clobbered, leaving R56 free for... + BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, + TCGMemOp s_bits, int off_rw, int off_add, + uint64_t bswap1, uint64_t bswap2) +{ + /* + .mii + mov r2 = off_rw + extr.u r3 = addr_reg, ... # extract tlb page + zxt4 r57 = addr_reg # or mov for 64-bit guest + ;; + .mii + addl r2 = r2, areg0 + shl r3 = r3, cteb # via dep.z + dep r1 = 0, r57, ... # zero page ofs, keep align + ;; + .mmi + add r2 = r2, r3 + ;; + ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest + nop + ;; + .mmi + nop + cmp.eq p6, p7 = r3, r58 + nop + ;; + */ + tcg_out_bundle(s, miI, + tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, - TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS)); - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - offset_rw, TCG_REG_R2), tcg_opc_ext_i(TCG_REG_P0, TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, - TCG_REG_R57, addr_reg), + TCG_REG_R57, addr_reg)); + tcg_out_bundle(s, miI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0)); - tcg_out_bundle(s, mII, + TCG_REG_R2, TCG_AREG0), + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, + TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1)); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), tcg_opc_m3 (TCG_REG_P0, (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56, - TCG_REG_R2, offset_addend - offset_rw), - tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0, - TCG_REG_R57, 63 - s_bits, - TARGET_PAGE_BITS - s_bits - 1), + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, + TCG_REG_R2, off_add - off_rw), + bswap1); + tcg_out_bundle(s, mmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R56)); + TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), + bswap2); } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, -}; +#define TCG_MAX_QEMU_LDST 640 + +typedef struct TCGLabelQemuLdst { + bool is_ld; + TCGMemOp size; + tcg_insn_unit *label_ptr; /* label pointers to be updated */ +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + int nb_ldst_labels; + TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST]; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->nb_ldst_labels = 0; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + tcg_insn_unit *label_ptr) +{ + TCGBackendData *be = s->be; + TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++]; -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) + assert(be->nb_ldst_labels <= TCG_MAX_QEMU_LDST); + l->is_ld = is_ld; + l->size = opc & MO_SIZE; + l->label_ptr = label_ptr; +} + +static void tcg_out_tb_finalize(TCGContext *s) +{ + static const void * const helpers[8] = { + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, + }; + tcg_insn_unit *thunks[8] = { }; + TCGBackendData *be = s->be; + size_t i, n = be->nb_ldst_labels; + + for (i = 0; i < n; i++) { + TCGLabelQemuLdst *l = &be->ldst_labels[i]; + long x = l->is_ld * 4 + l->size; + tcg_insn_unit *dest = thunks[x]; + + /* The out-of-line thunks are all the same; load the return address + from B0, load the GP, and branch to the code. Note that we are + always post-call, so the register window has rolled, so we're + using incomming parameter register numbers, not outgoing. */ + if (dest == NULL) { + uintptr_t *desc = (uintptr_t *)helpers[x]; + uintptr_t func = desc[0], gp = desc[1], disp; + + thunks[x] = dest = s->code_ptr; + + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_REG_R1, gp)); + tcg_out_bundle(s, mii, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + l->is_ld ? TCG_REG_R35 : TCG_REG_R36, + TCG_REG_B0)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l3 (disp), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); + } + + reloc_pcrel21b_slot2(l->label_ptr, dest); + } +} + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static const uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg, mem_index; - TCGMemOp s_bits, bswap; - - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + TCGMemOp opc, s_bits; + uint64_t fin1, fin2; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; /* Read the TLB entry */ tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + INSN_NOP_I, INSN_NOP_I); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_ld_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - if (bswap && s_bits == MO_16) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 15, 15)); - } else if (bswap && s_bits == MO_32) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 31, 31)); - } else { - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I); - } - if (!bswap) { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - INSN_NOP_I, - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + + fin2 = 0; + if (opc & MO_BSWAP) { + fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + fin2 = tcg_opc_i11(TCG_REG_P0, fin2, + data_reg, data_reg, shift, 63 - shift); + } } else { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); } - tcg_out_bundle(s, miI, - INSN_NOP_M, + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], + TCG_REG_R8, TCG_REG_R2), INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8)); -} + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, -}; + add_qemu_ldst_label(s, 1, opc, label_ptr); + + /* Note that we always use LE helper functions, so the bswap insns + here for the fast path also apply to the slow path. */ + tcg_out_bundle(s, (fin2 ? mII : miI), + INSN_NOP_M, + fin1, + fin2 ? fin2 : INSN_NOP_I); +} -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static const uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; - int addr_reg, data_reg, mem_index; - TCGMemOp s_bits; - - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + TCGReg addr_reg, data_reg; + int mem_index; + uint64_t pre1, pre2; + TCGMemOp opc, s_bits; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; + /* Note that we always use LE helper functions, so the bswap insns + that are here for the fast path also apply to the slow path, + and move the data into the argument register. */ + pre2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); + } + } else { + /* Just move the data into place for the slow path. */ + pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); + } + tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + pre1, pre2); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, + tcg_out_bundle(s, mmI, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_st_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - - switch (opc) { - case MO_8: - case MO_16: - case MO_32: - case MO_64: - tcg_out_bundle(s, mii, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I); - break; - - case MO_16 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_32 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_64 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg)); - data_reg = TCG_REG_R2; - break; - - default: - tcg_abort(); - } - + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index)); + label_ptr = s->code_ptr; tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - data_reg, TCG_REG_R3), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + TCG_REG_R58, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 0, opc, label_ptr); } #else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static uint64_t const opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg; - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -1900,8 +1872,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static uint64_t const opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 @@ -1910,10 +1881,11 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #if TARGET_LONG_BITS == 64 uint64_t add_guest_base; #endif - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -2023,24 +1995,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_br: tcg_out_br(s, args[0]); break; - case INDEX_op_call: - if (likely(const_args[0])) { - tcg_out_calli(s, args[0]); - } else { - tcg_out_callr(s, args[0]); - } - break; case INDEX_op_goto_tb: tcg_out_goto_tb(s, args[0]); break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); @@ -2237,42 +2195,24 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[3], const_args[3], args[4], const_args[4], 0); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, MO_UB); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, MO_SB); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, MO_TEUW); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, MO_TESW); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld32: - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, MO_TEUL); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args); break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, MO_TESL); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, MO_TEQ); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, MO_UB); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, MO_TEUW); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, MO_TEUL); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, MO_TEQ); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args); break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); } @@ -2280,13 +2220,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_br, { } }, - { INDEX_op_call, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_mov_i32, { "r", "r" } }, - { INDEX_op_movi_i32, { "r" } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, @@ -2328,9 +2264,6 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, { INDEX_op_ld16u_i64, { "r", "r" } }, @@ -2381,19 +2314,10 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, - { INDEX_op_qemu_ld8u, { "r", "r" } }, - { INDEX_op_qemu_ld8s, { "r", "r" } }, - { INDEX_op_qemu_ld16u, { "r", "r" } }, - { INDEX_op_qemu_ld16s, { "r", "r" } }, - { INDEX_op_qemu_ld32, { "r", "r" } }, - { INDEX_op_qemu_ld32u, { "r", "r" } }, - { INDEX_op_qemu_ld32s, { "r", "r" } }, - { INDEX_op_qemu_ld64, { "r", "r" } }, - - { INDEX_op_qemu_st8, { "SZ", "r" } }, - { INDEX_op_qemu_st16, { "SZ", "r" } }, - { INDEX_op_qemu_st32, { "SZ", "r" } }, - { INDEX_op_qemu_st64, { "SZ", "r" } }, + { INDEX_op_qemu_ld_i32, { "r", "r" } }, + { INDEX_op_qemu_ld_i64, { "r", "r" } }, + { INDEX_op_qemu_st_i32, { "SZ", "r" } }, + { INDEX_op_qemu_st_i64, { "SZ", "r" } }, { -1 }, }; @@ -2412,8 +2336,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) CPU_TEMP_BUF_NLONGS * sizeof(long)); /* First emit adhoc function descriptor */ - *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */ - s->code_ptr += 16; /* skip GP */ + *s->code_ptr = (tcg_insn_unit){ + (uint64_t)(s->code_ptr + 1), /* entry point */ + 0 /* skip gp */ + }; + s->code_ptr++; /* prologue */ tcg_out_bundle(s, miI, diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 52a939c94..d67558988 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -25,6 +25,12 @@ #ifndef TCG_TARGET_IA64 #define TCG_TARGET_IA64 1 +#define TCG_TARGET_INSN_UNIT_SIZE 16 +typedef struct { + uint64_t lo __attribute__((aligned(16))); + uint64_t hi; +} tcg_insn_unit; + /* We only map the first 64 registers */ #define TCG_TARGET_NB_REGS 64 typedef enum { @@ -152,8 +158,7 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 - -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) |