diff options
author | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-09-12 15:41:37 +0900 |
---|---|---|
committer | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-09-12 15:41:37 +0900 |
commit | 590861b31f5f1f7140d637173d8d9bac8d41ccc6 (patch) | |
tree | 7e2e5afd3ac4d896b310de7a980c509e6dddfd2b /tcg | |
parent | 64d5068524fc31f8941aeba31d6a34f935adf479 (diff) | |
parent | 1dc33ed90bf1fe1c2014dffa0d9e863c520d953a (diff) | |
download | qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.tar.gz qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.tar.bz2 qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.zip |
Merge tag 'v2.7.0' into develop_qemu_2.7
v2.7.0 release
Change-Id: Id5feb5a9404ab064f9ea3d0aa0d95eef17020fa3
Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 2 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 6 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.inc.c | 40 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/arm/tcg-target.inc.c | 32 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/i386/tcg-target.inc.c | 67 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.inc.c | 16 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/mips/tcg-target.inc.c | 21 | ||||
-rw-r--r-- | tcg/optimize.c | 40 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.inc.c | 48 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/s390/tcg-target.inc.c | 34 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.inc.c | 21 | ||||
-rw-r--r-- | tcg/tcg-common.c | 2 | ||||
-rw-r--r-- | tcg/tcg-op.c | 18 | ||||
-rw-r--r-- | tcg/tcg-op.h | 13 | ||||
-rw-r--r-- | tcg/tcg.c | 768 | ||||
-rw-r--r-- | tcg/tcg.h | 199 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/tci/tcg-target.inc.c | 18 |
25 files changed, 935 insertions, 448 deletions
diff --git a/tcg/README b/tcg/README index f4a8ac170b..ce8bebab37 100644 --- a/tcg/README +++ b/tcg/README @@ -473,7 +473,7 @@ On a 32 bit target, all 64 bit operations are converted to 32 bits. A few specific operations must be implemented to allow it (see add2_i32, sub2_i32, brcond2_i32). -On a 64 bit target, the values are transfered between 32 and 64-bit +On a 64 bit target, the values are transferred between 32 and 64-bit registers using the following ops: - trunc_shr_i64_i32 - ext_i32_i64 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 19a04a6e75..a1d101f891 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -10,8 +10,8 @@ * See the COPYING file in the top-level directory for details. */ -#ifndef TCG_TARGET_AARCH64 -#define TCG_TARGET_AARCH64 1 +#ifndef AARCH64_TCG_TARGET_H +#define AARCH64_TCG_TARGET_H #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 @@ -106,4 +106,4 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop) __builtin___clear_cache((char *)start, (char *)stop); } -#endif /* TCG_TARGET_AARCH64 */ +#endif /* AARCH64_TCG_TARGET_H */ diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index a8fb4420de..08b2d031aa 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -73,6 +73,18 @@ static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = deposit32(*code_ptr, 0, 26, offset); } +static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr, + tcg_insn_unit *target) +{ + ptrdiff_t offset = target - code_ptr; + tcg_insn_unit insn; + tcg_debug_assert(offset == sextract64(offset, 0, 26)); + /* read instruction, mask away previous PC_REL26 parameter contents, + set the proper offset, then write back the instruction. */ + insn = atomic_read(code_ptr); + atomic_set(code_ptr, deposit32(insn, 0, 26, offset)); +} + static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; @@ -704,6 +716,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, arg, arg1, arg2); } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_XZR, base, ofs); + return true; + } + return false; +} + static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn, unsigned int a, unsigned int b) { @@ -835,7 +857,7 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; tcg_insn_unit *target = (tcg_insn_unit *)addr; - reloc_pc26(code_ptr, target); + reloc_pc26_atomic(code_ptr, target); flush_icache_range(jmp_addr, jmp_addr + 4); } @@ -1059,19 +1081,20 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, int tlb_offset = is_read ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - int s_mask = (1 << (opc & MO_SIZE)) - 1; + int a_bits = get_alignment_bits(opc); TCGReg base = TCG_AREG0, x3; uint64_t tlb_mask; /* For aligned accesses, we check the first byte and include the alignment bits within the address. For unaligned access, we check that we don't cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { - tlb_mask = TARGET_PAGE_MASK | s_mask; + if (a_bits >= 0) { + /* A byte access or an alignment check required */ + tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1); x3 = addr_reg; } else { tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, addr_reg, s_mask); + TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1); tlb_mask = TARGET_PAGE_MASK; x3 = TCG_REG_X3; } @@ -1294,12 +1317,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #ifndef USE_DIRECT_JUMP #error "USE_DIRECT_JUMP required for aarch64" #endif - tcg_debug_assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* consistency for USE_DIRECT_JUMP */ + tcg_debug_assert(s->tb_jmp_insn_offset != NULL); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); /* actual branch destination will be patched by aarch64_tb_set_jmp_target later, beware retranslation. */ tcg_out_goto_noaddr(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 6559f80b71..a0e1acfa77 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -22,8 +22,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_ARM -#define TCG_TARGET_ARM 1 + +#ifndef ARM_TCG_TARGET_H +#define ARM_TCG_TARGET_H #undef TCG_TARGET_STACK_GROWSUP #define TCG_TARGET_INSN_UNIT_SIZE 4 diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 2b7fbddbf0..172febafdd 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -121,6 +121,14 @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); } +static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; + tcg_insn_unit insn = atomic_read(code_ptr); + tcg_debug_assert(offset == sextract32(offset, 0, 24)); + atomic_set(code_ptr, deposit32(insn, 0, 24, offset)); +} + static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -1038,6 +1046,16 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) } } +void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; + tcg_insn_unit *target = (tcg_insn_unit *)addr; + + /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */ + reloc_pc24_atomic(code_ptr, target); + flush_icache_range(jmp_addr, jmp_addr + 4); +} + static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) { if (l->has_value) { @@ -1647,17 +1665,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_goto(s, COND_AL, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* Direct jump method */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); tcg_out_b_noaddr(s, COND_AL); } else { /* Indirect jump method */ - intptr_t ptr = (intptr_t)(s->tb_next + args[0]); + intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]); tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_goto_label(s, COND_AL, arg_label(args[0])); @@ -2028,6 +2046,12 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_st32(s, COND_AL, arg, arg1, arg2); } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 92be341713..524cfc61fd 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_I386 -#define TCG_TARGET_I386 1 + +#ifndef I386_TCG_TARGET_H +#define I386_TCG_TARGET_H #define TCG_TARGET_INSN_UNIT_SIZE 1 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 007407c3fc..6f8cdca756 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -710,12 +710,19 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_modrm_offset(s, opc, arg, arg1, arg2); } -static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, - tcg_target_long ofs, tcg_target_long val) +static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) { - int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, 0, base, ofs); + int rexw = 0; + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { + if (val != (int32_t)val) { + return false; + } + rexw = P_REXW; + } + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs); tcg_out32(s, val); + return true; } static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) @@ -1123,6 +1130,21 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) tcg_out_branch(s, 0, dest); } +static void tcg_out_nopn(TCGContext *s, int n) +{ + int i; + /* Emit 1 or 2 operand size prefixes for the standard one byte nop, + * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the + * duplicate prefix, and all of the interesting recent cores can + * decode and discard the duplicates in a single cycle. + */ + tcg_debug_assert(n >= 1); + for (i = 1; i < n; ++i) { + tcg_out8(s, 0x66); + } + tcg_out8(s, 0x90); +} + #if defined(CONFIG_SOFTMMU) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) @@ -1180,8 +1202,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGType ttype = TCG_TYPE_I32; TCGType tlbtype = TCG_TYPE_I32; int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + int a_bits = get_alignment_bits(opc); + target_ulong tlb_mask; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1198,19 +1220,22 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { + if (a_bits >= 0) { + /* A byte access or an alignment check required */ tcg_out_mov(s, ttype, r1, addrlo); + tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1); } else { /* For unaligned access check that we don't cross pages using the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, + (1 << (opc & MO_SIZE)) - 1); + tlb_mask = TARGET_PAGE_MASK; } tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0); tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1306,10 +1331,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) ofs += 4; } - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); ofs += 4; - tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); + tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs); } else { tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); /* The second argument is already loaded with addrlo. */ @@ -1398,7 +1423,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) ofs += 4; } - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); ofs += 4; retaddr = TCG_REG_EAX; @@ -1775,17 +1800,25 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_jmp(s, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* direct jump method */ + int gap; + /* jump displacement must be aligned for atomic patching; + * see if we need to add extra nops before jump + */ + gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4)); + if (gap != 1) { + tcg_out_nopn(s, gap - 1); + } tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* indirect jump method */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_next + args[0])); + (intptr_t)(s->tb_jmp_target_addr + args[0])); } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index ae9b79f02f..6dddb7f772 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -22,8 +22,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_IA64 -#define TCG_TARGET_IA64 1 + +#ifndef IA64_TCG_TARGET_H +#define IA64_TCG_TARGET_H #define TCG_TARGET_INSN_UNIT_SIZE 16 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 21 diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c index 7557e6a9d4..c91f39281b 100644 --- a/tcg/ia64/tcg-target.inc.c +++ b/tcg/ia64/tcg-target.inc.c @@ -881,13 +881,13 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) { - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* direct jump method */ tcg_abort(); } else { /* indirect jump method */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, - (tcg_target_long)(s->tb_next + arg)); + (tcg_target_long)(s->tb_jmp_target_addr + arg)); tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), @@ -900,7 +900,7 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); } - s->tb_next_offset[arg] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[arg] = tcg_current_code_size(s); } static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) @@ -973,6 +973,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, } } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_R0, base, ofs); + return true; + } + return false; +} + static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, TCGReg ret, TCGArg arg1, int const_arg1, TCGArg arg2, int const_arg2) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index b1cda37b66..3aeac87614 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -23,8 +23,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_MIPS -#define TCG_TARGET_MIPS 1 + +#ifndef MIPS_TCG_TARGET_H +#define MIPS_TCG_TARGET_H #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index aaf881cfd0..2f9be48139 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -576,6 +576,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); + return true; + } + return false; +} + static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) { if (val == (int16_t)val) { @@ -1397,19 +1407,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); /* Avoid clobbering the address during retranslation. */ tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); } else { /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_next + a0)); + (uintptr_t)(s->tb_jmp_target_addr + a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, @@ -1885,7 +1895,6 @@ static void tcg_target_init(TCGContext *s) void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) { - uint32_t *ptr = (uint32_t *)jmp_addr; - *ptr = deposit32(*ptr, 0, 26, addr >> 2); + atomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2)); flush_icache_range(jmp_addr, jmp_addr + 4); } diff --git a/tcg/optimize.c b/tcg/optimize.c index f01160815c..cffe89b525 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -24,9 +24,8 @@ */ #include "qemu/osdep.h" - - #include "qemu-common.h" +#include "exec/cpu-common.h" #include "tcg-op.h" #define CASE_OP_32_64(x) \ @@ -83,37 +82,6 @@ static void init_temp_info(TCGArg temp) } } -static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) -{ - int oi = s->gen_next_op_idx; - int pi = s->gen_next_parm_idx; - int prev = old_op->prev; - int next = old_op - s->gen_op_buf; - TCGOp *new_op; - - tcg_debug_assert(oi < OPC_BUF_SIZE); - tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); - s->gen_next_op_idx = oi + 1; - s->gen_next_parm_idx = pi + nargs; - - new_op = &s->gen_op_buf[oi]; - *new_op = (TCGOp){ - .opc = opc, - .args = pi, - .prev = prev, - .next = next - }; - if (prev >= 0) { - s->gen_op_buf[prev].next = oi; - } else { - s->gen_first_op_idx = oi; - } - old_op->prev = oi; - - return new_op; -} - static int op_bits(TCGOpcode op) { const TCGOpDef *def = &tcg_op_defs[op]; @@ -584,7 +552,7 @@ void tcg_optimize(TCGContext *s) nb_globals = s->nb_globals; reset_all_temps(nb_temps); - for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { + for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { tcg_target_ulong mask, partmask, affected; int nb_oargs, nb_iargs, i; TCGArg tmp; @@ -1121,7 +1089,7 @@ void tcg_optimize(TCGContext *s) uint64_t a = ((uint64_t)ah << 32) | al; uint64_t b = ((uint64_t)bh << 32) | bl; TCGArg rl, rh; - TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); TCGArg *args2 = &s->gen_opparam_buf[op2->args]; if (opc == INDEX_op_add2_i32) { @@ -1147,7 +1115,7 @@ void tcg_optimize(TCGContext *s) uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; TCGArg rl, rh; - TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); TCGArg *args2 = &s->gen_opparam_buf[op2->args]; rl = args[0]; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b4f0818762..dd032f286b 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_PPC64 -#define TCG_TARGET_PPC64 1 + +#ifndef PPC_TCG_TARGET_H +#define PPC_TCG_TARGET_H #ifdef _ARCH_PPC64 # define TCG_TARGET_REG_BITS 64 diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 00bb90fc25..eaf1bd9bfd 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -857,6 +857,12 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, int const_arg2, int cr, TCGType type) { @@ -1237,6 +1243,7 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); } +#ifdef __powerpc64__ void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { tcg_insn_unit i1, i2; @@ -1265,11 +1272,18 @@ void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) pair = (uint64_t)i2 << 32 | i1; #endif - /* ??? __atomic_store_8, presuming there's some way to do that - for 32-bit, otherwise this is good enough for 64-bit. */ - *(uint64_t *)jmp_addr = pair; + atomic_set((uint64_t *)jmp_addr, pair); flush_icache_range(jmp_addr, jmp_addr + 8); } +#else +void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + intptr_t diff = addr - jmp_addr; + tcg_debug_assert(in_range_b(diff)); + atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc)); + flush_icache_range(jmp_addr, jmp_addr + 4); +} +#endif static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) { @@ -1391,6 +1405,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); TCGReg base = TCG_AREG0; TCGMemOp s_bits = opc & MO_SIZE; + int a_bits = get_alignment_bits(opc); /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1448,14 +1463,17 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, * the bottom bits and thus trigger a comparison failure on * unaligned accesses */ + if (a_bits < 0) { + a_bits = s_bits; + } tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (s_bits) { - /* > byte access, we need to handle alignment */ - if ((opc & MO_AMASK) == MO_ALIGN) { + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (a_bits) { + /* More than byte access, we need to handle alignment */ + if (a_bits > 0) { /* Alignment required by the front-end, same as 32-bits */ tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); } else { /* We support unaligned accesses, we need to make sure we fail @@ -1894,17 +1912,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_offset); - /* Direct jump. Ensure the next insns are 8-byte aligned. */ + tcg_debug_assert(s->tb_jmp_insn_offset); + /* Direct jump. */ +#ifdef __powerpc64__ + /* Ensure the next insns are 8-byte aligned. */ if ((uintptr_t)s->code_ptr & 7) { tcg_out32(s, NOP); } - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); /* To be replaced by either a branch+nop or a load into TMP1. */ s->code_ptr += 2; tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); +#else + /* To be replaced by a branch. */ + s->code_ptr++; +#endif + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: { diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index d9dc038733..0c1af244f3 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_S390 -#define TCG_TARGET_S390 1 + +#ifndef S390_TCG_TARGET_H +#define S390_TCG_TARGET_H #define TCG_TARGET_INSN_UNIT_SIZE 2 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 19 diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 5805532398..5a7495b063 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -219,6 +219,8 @@ typedef enum S390Opcode { RX_ST = 0x50, RX_STC = 0x42, RX_STH = 0x40, + + NOP = 0x0707, } S390Opcode; #ifdef CONFIG_DEBUG_TCG @@ -796,6 +798,12 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, } } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + /* load data from an absolute host address */ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) { @@ -1497,18 +1505,19 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, int mem_index, bool is_ld) { - int s_mask = (1 << (opc & MO_SIZE)) - 1; + int a_bits = get_alignment_bits(opc); int ofs, a_off; uint64_t tlb_mask; /* For aligned accesses, we check the first byte and include the alignment bits within the address. For unaligned access, we check that we don't cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + if (a_bits >= 0) { + /* A byte access or an alignment check required */ a_off = 0; - tlb_mask = TARGET_PAGE_MASK | s_mask; + tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1); } else { - a_off = s_mask; + a_off = (1 << (opc & MO_SIZE)) - 1; tlb_mask = TARGET_PAGE_MASK; } @@ -1715,17 +1724,24 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { + /* branch displacement must be aligned for atomic patching; + * see if we need to add extra nop before branch + */ + if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { + tcg_out16(s, NOP); + } tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); s->code_ptr += 2; } else { - /* load address stored at s->tb_next + args[0] */ - tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); + /* load address stored at s->tb_jmp_target_addr + args[0] */ + tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, + s->tb_jmp_target_addr + args[0]); /* and go there */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; OP_32_64(ld8u): diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 2cd72d2d41..88f9c90f5f 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef TCG_TARGET_SPARC -#define TCG_TARGET_SPARC 1 + +#ifndef SPARC_TCG_TARGET_H +#define SPARC_TCG_TARGET_H #define TCG_TARGET_REG_BITS 64 diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index d641cfd8c5..8e98172ca0 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -504,6 +504,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_G0, base, ofs); + return true; + } + return false; +} + static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) { tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); @@ -1229,18 +1239,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); /* Make sure to preserve links during retranslation. */ tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); } else { /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); + tcg_out_ld_ptr(s, TCG_REG_T1, + (uintptr_t)(s->tb_jmp_target_addr + a0)); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); } tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); @@ -1647,6 +1658,6 @@ void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) the code_gen_buffer can't be larger than 2GB. */ tcg_debug_assert(disp == (int32_t)disp); - *ptr = CALL | (uint32_t)disp >> 2; + atomic_set(ptr, deposit32(CALL, 0, 30, disp >> 2)); flush_icache_range(jmp_addr, jmp_addr + 4); } diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c index 97305a3efc..2f139de802 100644 --- a/tcg/tcg-common.c +++ b/tcg/tcg-common.c @@ -23,6 +23,8 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" +#include "exec/cpu-common.h" #include "tcg/tcg.h" #if defined(CONFIG_TCG_INTERPRETER) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index f554b86d40..0243c99094 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -23,8 +23,13 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "exec/exec-all.h" #include "tcg.h" #include "tcg-op.h" +#include "trace-tcg.h" +#include "trace/mem.h" /* Reduce the number of ifdefs below. This assumes that all uses of TCGV_HIGH and TCGV_LOW are properly protected by a conditional that @@ -47,7 +52,7 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) int pi = oi - 1; tcg_debug_assert(oi < OPC_BUF_SIZE); - ctx->gen_last_op_idx = oi; + ctx->gen_op_buf[0].prev = oi; ctx->gen_next_op_idx = ni; ctx->gen_op_buf[oi] = (TCGOp){ @@ -1846,6 +1851,9 @@ void tcg_gen_goto_tb(unsigned idx) static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) { + /* Trigger the asserts within as early as possible. */ + (void)get_alignment_bits(op); + switch (op & MO_SIZE) { case MO_8: op &= ~MO_BSWAP; @@ -1907,12 +1915,16 @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr, void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 0); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 0)); gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 1); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 1)); gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); } @@ -1929,6 +1941,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) } memop = tcg_canonicalize_memop(memop, 1, 0); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 0)); gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); } @@ -1940,5 +1954,7 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) } memop = tcg_canonicalize_memop(memop, 1, 1); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 1)); gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); } diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index c446d3dc72..f217e80747 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -753,6 +753,19 @@ static inline void tcg_gen_exit_tb(uintptr_t val) tcg_gen_op1i(INDEX_op_exit_tb, val); } +/** + * tcg_gen_goto_tb() - output goto_tb TCG operation + * @idx: Direct jump slot index (0 or 1) + * + * See tcg/README for more info about this TCG operation. + * + * NOTE: In softmmu emulation, direct jumps with goto_tb are only safe within + * the pages this TB resides in because we don't take care of direct jumps when + * address mapping changes, e.g. in tlb_flush(). In user mode, there's only a + * static address translation, so the destination address is always valid, TBs + * are always invalidated properly, and direct jumps are reset when mapping + * changes. + */ void tcg_gen_goto_tb(unsigned idx); #if TARGET_LONG_BITS == 32 @@ -23,7 +23,6 @@ */ /* define it to use liveness analysis (better code) */ -#define USE_LIVENESS_ANALYSIS #define USE_TCG_OPTIMIZATIONS #include "qemu/osdep.h" @@ -41,6 +40,11 @@ #define NO_CPU_IO_DEFS #include "cpu.h" +#include "qemu/host-utils.h" +#include "qemu/timer.h" +#include "exec/cpu-common.h" +#include "exec/exec-all.h" + #include "tcg-op.h" #if UINTPTR_MAX == UINT32_MAX @@ -103,6 +107,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args); static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); +static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs); static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); @@ -431,9 +437,9 @@ void tcg_func_start(TCGContext *s) s->goto_tb_issue_mask = 0; #endif - s->gen_first_op_idx = 0; - s->gen_last_op_idx = -1; - s->gen_next_op_idx = 0; + s->gen_op_buf[0].next = 1; + s->gen_op_buf[0].prev = 0; + s->gen_next_op_idx = 1; s->gen_next_parm_idx = 0; s->be = tcg_malloc(sizeof(TCGBackendData)); @@ -525,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, #endif if (!base_ts->fixed_reg) { - indirect_reg = 1; + /* We do not support double-indirect registers. */ + tcg_debug_assert(!base_ts->indirect_reg); base_ts->indirect_base = 1; + s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 + ? 2 : 1); + indirect_reg = 1; } if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { @@ -552,7 +562,7 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, ts2->mem_offset = offset + (1 - bigendian) * 4; pstrcpy(buf, sizeof(buf), name); pstrcat(buf, sizeof(buf), "_1"); - ts->name = strdup(buf); + ts2->name = strdup(buf); } else { ts->base_type = type; ts->type = type; @@ -818,16 +828,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, real_args++; } #endif - /* If stack grows up, then we will be placing successive - arguments at lower addresses, which means we need to - reverse the order compared to how we would normally - treat either big or little-endian. For those arguments - that will wind up in registers, this still works for - HPPA (the only current STACK_GROWSUP target) since the - argument registers are *also* allocated in decreasing - order. If another such target is added, this logic may - have to get more complicated to differentiate between - stack arguments and register arguments. */ + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) s->gen_opparam_buf[pi++] = args[i] + 1; s->gen_opparam_buf[pi++] = args[i]; @@ -862,7 +872,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, /* Make sure the calli field didn't overflow. */ tcg_debug_assert(s->gen_op_buf[i].calli == real_args); - s->gen_last_op_idx = i; + s->gen_op_buf[0].prev = i; s->gen_next_op_idx = i + 1; s->gen_next_parm_idx = pi; @@ -992,17 +1002,34 @@ static const char * const ldst_name[] = [MO_BEQ] = "beq", }; +static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { +#ifdef ALIGNED_ONLY + [MO_UNALN >> MO_ASHIFT] = "un+", + [MO_ALIGN >> MO_ASHIFT] = "", +#else + [MO_UNALN >> MO_ASHIFT] = "", + [MO_ALIGN >> MO_ASHIFT] = "al+", +#endif + [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", + [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", + [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", + [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", + [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", + [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", +}; + void tcg_dump_ops(TCGContext *s) { char buf[128]; TCGOp *op; int oi; - for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { + for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) { int i, k, nb_oargs, nb_iargs, nb_cargs; const TCGOpDef *def; const TCGArg *args; TCGOpcode c; + int col = 0; op = &s->gen_op_buf[oi]; c = op->opc; @@ -1010,7 +1037,7 @@ void tcg_dump_ops(TCGContext *s) args = &s->gen_opparam_buf[op->args]; if (c == INDEX_op_insn_start) { - qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : ""); + col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : ""); for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { target_ulong a; @@ -1019,7 +1046,7 @@ void tcg_dump_ops(TCGContext *s) #else a = args[i]; #endif - qemu_log(" " TARGET_FMT_lx, a); + col += qemu_log(" " TARGET_FMT_lx, a); } } else if (c == INDEX_op_call) { /* variable number of arguments */ @@ -1028,12 +1055,12 @@ void tcg_dump_ops(TCGContext *s) nb_cargs = def->nb_cargs; /* function name, flags, out args */ - qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, - tcg_find_helper(s, args[nb_oargs + nb_iargs]), - args[nb_oargs + nb_iargs + 1], nb_oargs); + col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, + tcg_find_helper(s, args[nb_oargs + nb_iargs]), + args[nb_oargs + nb_iargs + 1], nb_oargs); for (i = 0; i < nb_oargs; i++) { - qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), - args[i])); + col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[i])); } for (i = 0; i < nb_iargs; i++) { TCGArg arg = args[nb_oargs + i]; @@ -1041,10 +1068,10 @@ void tcg_dump_ops(TCGContext *s) if (arg != TCG_CALL_DUMMY_ARG) { t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); } - qemu_log(",%s", t); + col += qemu_log(",%s", t); } } else { - qemu_log(" %s ", def->name); + col += qemu_log(" %s ", def->name); nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -1053,17 +1080,17 @@ void tcg_dump_ops(TCGContext *s) k = 0; for (i = 0; i < nb_oargs; i++) { if (k != 0) { - qemu_log(","); + col += qemu_log(","); } - qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), - args[k++])); + col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); } for (i = 0; i < nb_iargs; i++) { if (k != 0) { - qemu_log(","); + col += qemu_log(","); } - qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), - args[k++])); + col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); } switch (c) { case INDEX_op_brcond_i32: @@ -1075,9 +1102,9 @@ void tcg_dump_ops(TCGContext *s) case INDEX_op_setcond_i64: case INDEX_op_movcond_i64: if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { - qemu_log(",%s", cond_name[args[k++]]); + col += qemu_log(",%s", cond_name[args[k++]]); } else { - qemu_log(",$0x%" TCG_PRIlx, args[k++]); + col += qemu_log(",$0x%" TCG_PRIlx, args[k++]); } i = 1; break; @@ -1091,18 +1118,12 @@ void tcg_dump_ops(TCGContext *s) unsigned ix = get_mmuidx(oi); if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { - qemu_log(",$0x%x,%u", op, ix); + col += qemu_log(",$0x%x,%u", op, ix); } else { - const char *s_al = "", *s_op; - if (op & MO_AMASK) { - if ((op & MO_AMASK) == MO_ALIGN) { - s_al = "al+"; - } else { - s_al = "un+"; - } - } + const char *s_al, *s_op; + s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; - qemu_log(",%s%s,%u", s_al, s_op, ix); + col += qemu_log(",%s%s,%u", s_al, s_op, ix); } i = 1; } @@ -1117,14 +1138,39 @@ void tcg_dump_ops(TCGContext *s) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: case INDEX_op_brcond2_i32: - qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); + col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); i++, k++; break; default: break; } for (; i < nb_cargs; i++, k++) { - qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); + col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); + } + } + if (op->life) { + unsigned life = op->life; + + for (; col < 48; ++col) { + putc(' ', qemu_logfile); + } + + if (life & (SYNC_ARG * 3)) { + qemu_log(" sync:"); + for (i = 0; i < 2; ++i) { + if (life & (SYNC_ARG << i)) { + qemu_log(" %d", i); + } + } + } + life /= DEAD_ARG; + if (life) { + qemu_log(" dead:"); + for (i = 0; life; ++i, life >>= 1) { + if (life & 1) { + qemu_log(" %d", i); + } + } } } qemu_log("\n"); @@ -1281,71 +1327,116 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) int next = op->next; int prev = op->prev; - if (next >= 0) { - s->gen_op_buf[next].prev = prev; - } else { - s->gen_last_op_idx = prev; - } - if (prev >= 0) { - s->gen_op_buf[prev].next = next; - } else { - s->gen_first_op_idx = next; - } + /* We should never attempt to remove the list terminator. */ + tcg_debug_assert(op != &s->gen_op_buf[0]); + + s->gen_op_buf[next].prev = prev; + s->gen_op_buf[prev].next = next; - memset(op, -1, sizeof(*op)); + memset(op, 0, sizeof(*op)); #ifdef CONFIG_PROFILER s->del_op_count++; #endif } -#ifdef USE_LIVENESS_ANALYSIS +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, int nargs) +{ + int oi = s->gen_next_op_idx; + int pi = s->gen_next_parm_idx; + int prev = old_op->prev; + int next = old_op - s->gen_op_buf; + TCGOp *new_op; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); + s->gen_next_op_idx = oi + 1; + s->gen_next_parm_idx = pi + nargs; + + new_op = &s->gen_op_buf[oi]; + *new_op = (TCGOp){ + .opc = opc, + .args = pi, + .prev = prev, + .next = next + }; + s->gen_op_buf[prev].next = oi; + old_op->prev = oi; + + return new_op; +} + +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, int nargs) +{ + int oi = s->gen_next_op_idx; + int pi = s->gen_next_parm_idx; + int prev = old_op - s->gen_op_buf; + int next = old_op->next; + TCGOp *new_op; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); + s->gen_next_op_idx = oi + 1; + s->gen_next_parm_idx = pi + nargs; + + new_op = &s->gen_op_buf[oi]; + *new_op = (TCGOp){ + .opc = opc, + .args = pi, + .prev = prev, + .next = next + }; + s->gen_op_buf[next].prev = oi; + old_op->next = oi; + + return new_op; +} + +#define TS_DEAD 1 +#define TS_MEM 2 + +#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) +#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) + /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ -static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps, - uint8_t *mem_temps) +static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) { - memset(dead_temps, 1, s->nb_temps); - memset(mem_temps, 1, s->nb_globals); - memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals); + memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals); + memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals); } /* liveness analysis: end of basic block: all temps are dead, globals and local temps should be in memory. */ -static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, - uint8_t *mem_temps) +static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) { - int i; + int i, n; - memset(dead_temps, 1, s->nb_temps); - memset(mem_temps, 1, s->nb_globals); - for(i = s->nb_globals; i < s->nb_temps; i++) { - mem_temps[i] = s->temps[i].temp_local; + tcg_la_func_end(s, temp_state); + for (i = s->nb_globals, n = s->nb_temps; i < n; i++) { + if (s->temps[i].temp_local) { + temp_state[i] |= TS_MEM; + } } } -/* Liveness analysis : update the opc_dead_args array to tell if a +/* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ -static void tcg_liveness_analysis(TCGContext *s) +static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) { - uint8_t *dead_temps, *mem_temps; - int oi, oi_prev, nb_ops; + int nb_globals = s->nb_globals; + int oi, oi_prev; - nb_ops = s->gen_next_op_idx; - s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); - s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); - - dead_temps = tcg_malloc(s->nb_temps); - mem_temps = tcg_malloc(s->nb_temps); - tcg_la_func_end(s, dead_temps, mem_temps); + tcg_la_func_end(s, temp_state); - for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) { + for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { int i, nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; bool have_opc_new2; - uint16_t dead_args; - uint8_t sync_args; + TCGLifeData arg_life = 0; TCGArg arg; TCGOp * const op = &s->gen_op_buf[oi]; @@ -1368,7 +1459,7 @@ static void tcg_liveness_analysis(TCGContext *s) if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for (i = 0; i < nb_oargs; i++) { arg = args[i]; - if (!dead_temps[arg] || mem_temps[arg]) { + if (temp_state[arg] != TS_DEAD) { goto do_not_remove_call; } } @@ -1377,46 +1468,44 @@ static void tcg_liveness_analysis(TCGContext *s) do_not_remove_call: /* output args are dead */ - dead_args = 0; - sync_args = 0; for (i = 0; i < nb_oargs; i++) { arg = args[i]; - if (dead_temps[arg]) { - dead_args |= (1 << i); + if (temp_state[arg] & TS_DEAD) { + arg_life |= DEAD_ARG << i; } - if (mem_temps[arg]) { - sync_args |= (1 << i); + if (temp_state[arg] & TS_MEM) { + arg_life |= SYNC_ARG << i; } - dead_temps[arg] = 1; - mem_temps[arg] = 0; + temp_state[arg] = TS_DEAD; } - if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { - /* globals should be synced to memory */ - memset(mem_temps, 1, s->nb_globals); - } if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | TCG_CALL_NO_READ_GLOBALS))) { /* globals should go back to memory */ - memset(dead_temps, 1, s->nb_globals); + memset(temp_state, TS_DEAD | TS_MEM, nb_globals); + } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + /* globals should be synced to memory */ + for (i = 0; i < nb_globals; i++) { + temp_state[i] |= TS_MEM; + } } /* record arguments that die in this helper */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg = args[i]; if (arg != TCG_CALL_DUMMY_ARG) { - if (dead_temps[arg]) { - dead_args |= (1 << i); + if (temp_state[arg] & TS_DEAD) { + arg_life |= DEAD_ARG << i; } } } /* input arguments are live for preceding opcodes */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg = args[i]; - dead_temps[arg] = 0; + if (arg != TCG_CALL_DUMMY_ARG) { + temp_state[arg] &= ~TS_DEAD; + } } - s->op_dead_args[oi] = dead_args; - s->op_sync_args[oi] = sync_args; } } break; @@ -1424,8 +1513,7 @@ static void tcg_liveness_analysis(TCGContext *s) break; case INDEX_op_discard: /* mark the temporary as dead */ - dead_temps[args[0]] = 1; - mem_temps[args[0]] = 0; + temp_state[args[0]] = TS_DEAD; break; case INDEX_op_add2_i32: @@ -1446,8 +1534,8 @@ static void tcg_liveness_analysis(TCGContext *s) the low part. The result can be optimized to a simple add or sub. This happens often for x86_64 guest when the cpu mode is set to 32 bit. */ - if (dead_temps[args[1]] && !mem_temps[args[1]]) { - if (dead_temps[args[0]] && !mem_temps[args[0]]) { + if (temp_state[args[1]] == TS_DEAD) { + if (temp_state[args[0]] == TS_DEAD) { goto do_remove; } /* Replace the opcode and adjust the args in place, @@ -1484,8 +1572,8 @@ static void tcg_liveness_analysis(TCGContext *s) do_mul2: nb_iargs = 2; nb_oargs = 2; - if (dead_temps[args[1]] && !mem_temps[args[1]]) { - if (dead_temps[args[0]] && !mem_temps[args[0]]) { + if (temp_state[args[1]] == TS_DEAD) { + if (temp_state[args[0]] == TS_DEAD) { /* Both parts of the operation are dead. */ goto do_remove; } @@ -1493,8 +1581,7 @@ static void tcg_liveness_analysis(TCGContext *s) op->opc = opc = opc_new; args[1] = args[2]; args[2] = args[3]; - } else if (have_opc_new2 && dead_temps[args[0]] - && !mem_temps[args[0]]) { + } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) { /* The low part of the operation is dead; generate the high. */ op->opc = opc = opc_new2; args[0] = args[1]; @@ -1517,8 +1604,7 @@ static void tcg_liveness_analysis(TCGContext *s) implies side effects */ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { for (i = 0; i < nb_oargs; i++) { - arg = args[i]; - if (!dead_temps[arg] || mem_temps[arg]) { + if (temp_state[args[i]] != TS_DEAD) { goto do_not_remove; } } @@ -1527,59 +1613,203 @@ static void tcg_liveness_analysis(TCGContext *s) } else { do_not_remove: /* output args are dead */ - dead_args = 0; - sync_args = 0; for (i = 0; i < nb_oargs; i++) { arg = args[i]; - if (dead_temps[arg]) { - dead_args |= (1 << i); + if (temp_state[arg] & TS_DEAD) { + arg_life |= DEAD_ARG << i; } - if (mem_temps[arg]) { - sync_args |= (1 << i); + if (temp_state[arg] & TS_MEM) { + arg_life |= SYNC_ARG << i; } - dead_temps[arg] = 1; - mem_temps[arg] = 0; + temp_state[arg] = TS_DEAD; } /* if end of basic block, update */ if (def->flags & TCG_OPF_BB_END) { - tcg_la_bb_end(s, dead_temps, mem_temps); + tcg_la_bb_end(s, temp_state); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { /* globals should be synced to memory */ - memset(mem_temps, 1, s->nb_globals); + for (i = 0; i < nb_globals; i++) { + temp_state[i] |= TS_MEM; + } } /* record arguments that die in this opcode */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; - if (dead_temps[arg]) { - dead_args |= (1 << i); + if (temp_state[arg] & TS_DEAD) { + arg_life |= DEAD_ARG << i; } } /* input arguments are live for preceding opcodes */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg = args[i]; - dead_temps[arg] = 0; + temp_state[args[i]] &= ~TS_DEAD; } - s->op_dead_args[oi] = dead_args; - s->op_sync_args[oi] = sync_args; } break; } + op->life = arg_life; } } -#else -/* dummy liveness analysis */ -static void tcg_liveness_analysis(TCGContext *s) + +/* Liveness analysis: Convert indirect regs to direct temporaries. */ +static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) { - int nb_ops = s->gen_next_op_idx; + int nb_globals = s->nb_globals; + int16_t *dir_temps; + int i, oi, oi_next; + bool changes = false; + + dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); + memset(dir_temps, 0, nb_globals * sizeof(int16_t)); + + /* Create a temporary for each indirect global. */ + for (i = 0; i < nb_globals; ++i) { + TCGTemp *its = &s->temps[i]; + if (its->indirect_reg) { + TCGTemp *dts = tcg_temp_alloc(s); + dts->type = its->type; + dts->base_type = its->base_type; + dir_temps[i] = temp_idx(s, dts); + } + } + + memset(temp_state, TS_DEAD, nb_globals); + + for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { + TCGOp *op = &s->gen_op_buf[oi]; + TCGArg *args = &s->gen_opparam_buf[op->args]; + TCGOpcode opc = op->opc; + const TCGOpDef *def = &tcg_op_defs[opc]; + TCGLifeData arg_life = op->life; + int nb_iargs, nb_oargs, call_flags; + TCGArg arg, dir; + + oi_next = op->next; + + if (opc == INDEX_op_call) { + nb_oargs = op->callo; + nb_iargs = op->calli; + call_flags = args[nb_oargs + nb_iargs + 1]; + } else { + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + /* Set flags similar to how calls require. */ + if (def->flags & TCG_OPF_BB_END) { + /* Like writing globals: save_globals */ + call_flags = 0; + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + /* Like reading globals: sync_globals */ + call_flags = TCG_CALL_NO_WRITE_GLOBALS; + } else { + /* No effect on globals. */ + call_flags = (TCG_CALL_NO_READ_GLOBALS | + TCG_CALL_NO_WRITE_GLOBALS); + } + } + + /* Make sure that input arguments are available. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + arg = args[i]; + /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ + if (arg < nb_globals) { + dir = dir_temps[arg]; + if (dir != 0 && temp_state[arg] == TS_DEAD) { + TCGTemp *its = &s->temps[arg]; + TCGOpcode lopc = (its->type == TCG_TYPE_I32 + ? INDEX_op_ld_i32 + : INDEX_op_ld_i64); + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGArg *largs = &s->gen_opparam_buf[lop->args]; + + largs[0] = dir; + largs[1] = temp_idx(s, its->mem_base); + largs[2] = its->mem_offset; + + /* Loaded, but synced with memory. */ + temp_state[arg] = TS_MEM; + } + } + } + + /* Perform input replacement, and mark inputs that became dead. + No action is required except keeping temp_state up to date + so that we reload when needed. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + arg = args[i]; + if (arg < nb_globals) { + dir = dir_temps[arg]; + if (dir != 0) { + args[i] = dir; + changes = true; + if (IS_DEAD_ARG(i)) { + temp_state[arg] = TS_DEAD; + } + } + } + } + + /* Liveness analysis should ensure that the following are + all correct, for call sites and basic block end points. */ + if (call_flags & TCG_CALL_NO_READ_GLOBALS) { + /* Nothing to do */ + } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { + for (i = 0; i < nb_globals; ++i) { + /* Liveness should see that globals are synced back, + that is, either TS_DEAD or TS_MEM. */ + tcg_debug_assert(dir_temps[i] == 0 + || temp_state[i] != 0); + } + } else { + for (i = 0; i < nb_globals; ++i) { + /* Liveness should see that globals are saved back, + that is, TS_DEAD, waiting to be reloaded. */ + tcg_debug_assert(dir_temps[i] == 0 + || temp_state[i] == TS_DEAD); + } + } + + /* Outputs become available. */ + for (i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (arg >= nb_globals) { + continue; + } + dir = dir_temps[arg]; + if (dir == 0) { + continue; + } + args[i] = dir; + changes = true; + + /* The output is now live and modified. */ + temp_state[arg] = 0; + + /* Sync outputs upon their last write. */ + if (NEED_SYNC_ARG(i)) { + TCGTemp *its = &s->temps[arg]; + TCGOpcode sopc = (its->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGArg *sargs = &s->gen_opparam_buf[sop->args]; + + sargs[0] = dir; + sargs[1] = temp_idx(s, its->mem_base); + sargs[2] = its->mem_offset; + + temp_state[arg] = TS_MEM; + } + /* Drop outputs that are dead. */ + if (IS_DEAD_ARG(i)) { + temp_state[arg] = TS_DEAD; + } + } + } - s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); - memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t)); - s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); - memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t)); + return changes; } -#endif #ifdef CONFIG_DEBUG_TCG static void dump_regs(TCGContext *s) @@ -1675,35 +1905,81 @@ static void temp_allocate_frame(TCGContext *s, int temp) static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); -/* sync register 'reg' by saving it to the corresponding temporary */ -static void tcg_reg_sync(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) +/* Mark a temporary as free or dead. If 'free_or_dead' is negative, + mark it free; otherwise mark it dead. */ +static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) { - TCGTemp *ts = s->reg_to_temp[reg]; + if (ts->fixed_reg) { + return; + } + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; + } + ts->val_type = (free_or_dead < 0 + || ts->temp_local + || temp_idx(s, ts) < s->nb_globals + ? TEMP_VAL_MEM : TEMP_VAL_DEAD); +} - tcg_debug_assert(ts->val_type == TEMP_VAL_REG); - if (!ts->mem_coherent && !ts->fixed_reg) { +/* Mark a temporary as dead. */ +static inline void temp_dead(TCGContext *s, TCGTemp *ts) +{ + temp_free_or_dead(s, ts, 1); +} + +/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary + registers needs to be allocated to store a constant. If 'free_or_dead' + is non-zero, subsequently release the temporary; if it is positive, the + temp is dead; if it is negative, the temp is free. */ +static void temp_sync(TCGContext *s, TCGTemp *ts, + TCGRegSet allocated_regs, int free_or_dead) +{ + if (ts->fixed_reg) { + return; + } + if (!ts->mem_coherent) { if (!ts->mem_allocated) { temp_allocate_frame(s, temp_idx(s, ts)); - } else if (ts->indirect_reg) { - tcg_regset_set_reg(allocated_regs, ts->reg); - temp_load(s, ts->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], + } + switch (ts->val_type) { + case TEMP_VAL_CONST: + /* If we're going to free the temp immediately, then we won't + require it later in a register, so attempt to store the + constant to memory directly. */ + if (free_or_dead + && tcg_out_sti(s, ts->type, ts->val, + ts->mem_base->reg, ts->mem_offset)) { + break; + } + temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs); + /* fallthrough */ + + case TEMP_VAL_REG: + tcg_out_st(s, ts->type, ts->reg, + ts->mem_base->reg, ts->mem_offset); + break; + + case TEMP_VAL_MEM: + break; + + case TEMP_VAL_DEAD: + default: + tcg_abort(); } - tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); + ts->mem_coherent = 1; + } + if (free_or_dead) { + temp_free_or_dead(s, ts, free_or_dead); } - ts->mem_coherent = 1; } /* free register 'reg' by spilling the corresponding temporary if necessary */ static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { TCGTemp *ts = s->reg_to_temp[reg]; - if (ts != NULL) { - tcg_reg_sync(s, reg, allocated_regs); - ts->val_type = TEMP_VAL_MEM; - s->reg_to_temp[reg] = NULL; + temp_sync(s, ts, allocated_regs, -1); } } @@ -1755,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, break; case TEMP_VAL_MEM: reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); - if (ts->indirect_reg) { - tcg_regset_set_reg(allocated_regs, reg); - temp_load(s, ts->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], - allocated_regs); - } tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; @@ -1773,57 +2043,13 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, s->reg_to_temp[reg] = ts; } -/* mark a temporary as dead. */ -static inline void temp_dead(TCGContext *s, TCGTemp *ts) -{ - if (ts->fixed_reg) { - return; - } - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = (temp_idx(s, ts) < s->nb_globals || ts->temp_local - ? TEMP_VAL_MEM : TEMP_VAL_DEAD); -} - -/* sync a temporary to memory. 'allocated_regs' is used in case a - temporary registers needs to be allocated to store a constant. */ -static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) +/* Save a temporary to memory. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) { - if (ts->fixed_reg) { - return; - } - switch (ts->val_type) { - case TEMP_VAL_CONST: - temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs); - /* fallthrough */ - case TEMP_VAL_REG: - tcg_reg_sync(s, ts->reg, allocated_regs); - break; - case TEMP_VAL_DEAD: - case TEMP_VAL_MEM: - break; - default: - tcg_abort(); - } -} - -/* save a temporary to memory. 'allocated_regs' is used in case a - temporary registers needs to be allocated to store a constant. */ -static inline void temp_save(TCGContext *s, TCGTemp *ts, - TCGRegSet allocated_regs) -{ -#ifdef USE_LIVENESS_ANALYSIS - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - /* The liveness analysis already ensures that globals are back - in memory. Keep an tcg_debug_assert for safety. */ - tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); - return; - } -#endif - temp_sync(s, ts, allocated_regs); - temp_dead(s, ts); + /* The liveness analysis already ensures that globals are back + in memory. Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); } /* save globals to their canonical location and assume they can be @@ -1847,16 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) for (i = 0; i < s->nb_globals; i++) { TCGTemp *ts = &s->temps[i]; -#ifdef USE_LIVENESS_ANALYSIS - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - tcg_debug_assert(ts->val_type != TEMP_VAL_REG - || ts->fixed_reg - || ts->mem_coherent); - continue; - } -#endif - temp_sync(s, ts, allocated_regs); + tcg_debug_assert(ts->val_type != TEMP_VAL_REG + || ts->fixed_reg + || ts->mem_coherent); } } @@ -1871,27 +2090,17 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) if (ts->temp_local) { temp_save(s, ts, allocated_regs); } else { -#ifdef USE_LIVENESS_ANALYSIS - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - /* The liveness analysis already ensures that temps are dead. - Keep an tcg_debug_assert for safety. */ - tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); - continue; - } -#endif - temp_dead(s, ts); + /* The liveness analysis already ensures that temps are dead. + Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); } } save_globals(s, allocated_regs); } -#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1) -#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1) - static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, - uint16_t dead_args, uint8_t sync_args) + TCGLifeData arg_life) { TCGTemp *ots; tcg_target_ulong val; @@ -1900,28 +2109,27 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, val = args[1]; if (ots->fixed_reg) { - /* for fixed registers, we do not do any constant - propagation */ + /* For fixed registers, we do not do any constant propagation. */ tcg_out_movi(s, ots->type, ots->reg, val); - } else { - /* The movi is not explicitly generated here */ - if (ots->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ots->reg] = NULL; - } - ots->val_type = TEMP_VAL_CONST; - ots->val = val; + return; } - if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, s->reserved_regs); + + /* The movi is not explicitly generated here. */ + if (ots->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ots->reg] = NULL; } - if (IS_DEAD_ARG(0)) { + ots->val_type = TEMP_VAL_CONST; + ots->val = val; + ots->mem_coherent = 0; + if (NEED_SYNC_ARG(0)) { + temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); + } else if (IS_DEAD_ARG(0)) { temp_dead(s, ots); } } static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, - const TCGArg *args, uint16_t dead_args, - uint8_t sync_args) + const TCGArg *args, TCGLifeData arg_life) { TCGRegSet allocated_regs; TCGTemp *ts, *ots; @@ -1953,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, if (!ots->mem_allocated) { temp_allocate_frame(s, args[0]); } - if (ots->indirect_reg) { - tcg_regset_set_reg(allocated_regs, ts->reg); - temp_load(s, ots->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], - allocated_regs); - } tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); if (IS_DEAD_ARG(1)) { temp_dead(s, ts); @@ -1999,15 +2201,14 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, ots->mem_coherent = 0; s->reg_to_temp[ots->reg] = ots; if (NEED_SYNC_ARG(0)) { - tcg_reg_sync(s, ots->reg, allocated_regs); + temp_sync(s, ots, allocated_regs, 0); } } } static void tcg_reg_alloc_op(TCGContext *s, const TCGOpDef *def, TCGOpcode opc, - const TCGArg *args, uint16_t dead_args, - uint8_t sync_args) + const TCGArg *args, TCGLifeData arg_life) { TCGRegSet allocated_regs; int i, k, nb_iargs, nb_oargs; @@ -2158,9 +2359,8 @@ static void tcg_reg_alloc_op(TCGContext *s, tcg_out_mov(s, ts->type, ts->reg, reg); } if (NEED_SYNC_ARG(i)) { - tcg_reg_sync(s, reg, allocated_regs); - } - if (IS_DEAD_ARG(i)) { + temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); + } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } } @@ -2173,8 +2373,7 @@ static void tcg_reg_alloc_op(TCGContext *s, #endif static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, - const TCGArg * const args, uint16_t dead_args, - uint8_t sync_args) + const TCGArg * const args, TCGLifeData arg_life) { int flags, nb_regs, i; TCGReg reg; @@ -2293,9 +2492,8 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, ts->mem_coherent = 0; s->reg_to_temp[reg] = ts; if (NEED_SYNC_ARG(i)) { - tcg_reg_sync(s, reg, allocated_regs); - } - if (IS_DEAD_ARG(i)) { + temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); + } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } } @@ -2331,7 +2529,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) { int n; - n = s->gen_last_op_idx + 1; + n = s->gen_op_buf[0].prev + 1; s->op_count += n; if (n > s->op_count_max) { s->op_count_max = n; @@ -2367,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) s->la_time -= profile_getclock(); #endif - tcg_liveness_analysis(s); + { + uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); + + liveness_pass_1(s, temp_state); + + if (s->nb_indirects > 0) { +#ifdef DEBUG_DISAS + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) + && qemu_log_in_addr_range(tb->pc))) { + qemu_log("OP before indirect lowering:\n"); + tcg_dump_ops(s); + qemu_log("\n"); + } +#endif + /* Replace indirect temps with direct temps. */ + if (liveness_pass_2(s, temp_state)) { + /* If changes were made, re-run liveness. */ + liveness_pass_1(s, temp_state); + } + } + } #ifdef CONFIG_PROFILER s->la_time += profile_getclock(); @@ -2390,13 +2608,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) tcg_out_tb_init(s); num_insns = -1; - for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { + for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { TCGOp * const op = &s->gen_op_buf[oi]; TCGArg * const args = &s->gen_opparam_buf[op->args]; TCGOpcode opc = op->opc; const TCGOpDef *def = &tcg_op_defs[opc]; - uint16_t dead_args = s->op_dead_args[oi]; - uint8_t sync_args = s->op_sync_args[oi]; + TCGLifeData arg_life = op->life; oi_next = op->next; #ifdef CONFIG_PROFILER @@ -2406,11 +2623,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) switch (opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: - tcg_reg_alloc_mov(s, def, args, dead_args, sync_args); + tcg_reg_alloc_mov(s, def, args, arg_life); break; case INDEX_op_movi_i32: case INDEX_op_movi_i64: - tcg_reg_alloc_movi(s, args, dead_args, sync_args); + tcg_reg_alloc_movi(s, args, arg_life); break; case INDEX_op_insn_start: if (num_insns >= 0) { @@ -2435,8 +2652,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) tcg_out_label(s, arg_label(args[0]), s->code_ptr); break; case INDEX_op_call: - tcg_reg_alloc_call(s, op->callo, op->calli, args, - dead_args, sync_args); + tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life); break; default: /* Sanity check that we've not introduced any unhandled opcodes. */ @@ -2446,7 +2662,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) /* Note: in order to speed up the code, it would be much faster to have specialized register allocator functions for some common argument patterns */ - tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args); + tcg_reg_alloc_op(s, def, opc, args, arg_life); break; } #ifdef CONFIG_DEBUG_TCG @@ -26,9 +26,32 @@ #define TCG_H #include "qemu-common.h" +#include "cpu.h" +#include "exec/tb-context.h" #include "qemu/bitops.h" #include "tcg-target.h" +/* XXX: make safe guess about sizes */ +#define MAX_OP_PER_INSTR 266 + +#if HOST_LONG_BITS == 32 +#define MAX_OPC_PARAM_PER_ARG 2 +#else +#define MAX_OPC_PARAM_PER_ARG 1 +#endif +#define MAX_OPC_PARAM_IARGS 5 +#define MAX_OPC_PARAM_OARGS 1 +#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) + +/* A Call op needs up to 4 + 2N parameters on 32-bit archs, + * and up to 4 + N parameters on 64-bit archs + * (N = number of input arguments + output arguments). */ +#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) +#define OPC_BUF_SIZE 640 +#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR) + +#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM) + #define CPU_TEMP_BUF_NLONGS 128 /* Default target word size to pointer size. */ @@ -168,6 +191,15 @@ typedef uint64_t tcg_insn_unit; #endif +#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS +# define tcg_debug_assert(X) do { assert(X); } while (0) +#elif QEMU_GNUC_PREREQ(4, 5) +# define tcg_debug_assert(X) \ + do { if (!(X)) { __builtin_unreachable(); } } while (0) +#else +# define tcg_debug_assert(X) do { (void)(X); } while (0) +#endif + typedef struct TCGRelocation { struct TCGRelocation *next; int type; @@ -252,10 +284,26 @@ typedef enum TCGMemOp { #endif /* MO_UNALN accesses are never checked for alignment. - MO_ALIGN accesses will result in a call to the CPU's - do_unaligned_access hook if the guest address is not aligned. - The default depends on whether the target CPU defines ALIGNED_ONLY. */ - MO_AMASK = 16, + * MO_ALIGN accesses will result in a call to the CPU's + * do_unaligned_access hook if the guest address is not aligned. + * The default depends on whether the target CPU defines ALIGNED_ONLY. + * Some architectures (e.g. ARMv8) need the address which is aligned + * to a size more than the size of the memory access. + * To support such check it's enough the current costless alignment + * check implementation in QEMU, but we need to support + * an alignment size specifying. + * MO_ALIGN supposes a natural alignment + * (i.e. the alignment size is the size of a memory access). + * Note that an alignment size must be equal or greater + * than an access size. + * There are three options: + * - an alignment to the size of an access (MO_ALIGN); + * - an alignment to the specified size that is equal or greater than + * an access size (MO_ALIGN_x where 'x' is a size in bytes); + * - unaligned access permitted (MO_UNALN). + */ + MO_ASHIFT = 4, + MO_AMASK = 7 << MO_ASHIFT, #ifdef ALIGNED_ONLY MO_ALIGN = 0, MO_UNALN = MO_AMASK, @@ -263,6 +311,12 @@ typedef enum TCGMemOp { MO_ALIGN = MO_AMASK, MO_UNALN = 0, #endif + MO_ALIGN_2 = 1 << MO_ASHIFT, + MO_ALIGN_4 = 2 << MO_ASHIFT, + MO_ALIGN_8 = 3 << MO_ASHIFT, + MO_ALIGN_16 = 4 << MO_ASHIFT, + MO_ALIGN_32 = 5 << MO_ASHIFT, + MO_ALIGN_64 = 6 << MO_ASHIFT, /* Combinations of the above, for ease of use. */ MO_UB = MO_8, @@ -294,6 +348,45 @@ typedef enum TCGMemOp { MO_SSIZE = MO_SIZE | MO_SIGN, } TCGMemOp; +/** + * get_alignment_bits + * @memop: TCGMemOp value + * + * Extract the alignment size from the memop. + * + * Returns: 0 in case of byte access (which is always aligned); + * positive value - number of alignment bits; + * negative value if unaligned access enabled + * and this is not a byte access. + */ +static inline int get_alignment_bits(TCGMemOp memop) +{ + int a = memop & MO_AMASK; + int s = memop & MO_SIZE; + int r; + + if (a == MO_UNALN) { + /* Negative value if unaligned access enabled, + * or zero value in case of byte access. + */ + return -s; + } else if (a == MO_ALIGN) { + /* A natural alignment: return a number of access size bits */ + r = s; + } else { + /* Specific alignment size. It must be equal or greater + * than the access size. + */ + r = a >> MO_ASHIFT; + tcg_debug_assert(r >= s); + } +#if defined(CONFIG_SOFTMMU) + /* The requested alignment cannot overlap the TLB flags. */ + tcg_debug_assert((TLB_FLAGS_MASK & ((1 << r) - 1)) == 0); +#endif + return r; +} + typedef tcg_target_ulong TCGArg; /* Define a type and accessor macros for variables. Using pointer types @@ -482,24 +575,41 @@ typedef struct TCGTempSet { unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; } TCGTempSet; +/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, + this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. + There are never more than 2 outputs, which means that we can store all + dead + sync data within 16 bits. */ +#define DEAD_ARG 4 +#define SYNC_ARG 1 +typedef uint16_t TCGLifeData; + +/* The layout here is designed to avoid crossing of a 32-bit boundary. + If we do so, gcc adds padding, expanding the size to 12. */ typedef struct TCGOp { - TCGOpcode opc : 8; + TCGOpcode opc : 8; /* 8 */ + + /* Index of the prev/next op, or 0 for the end of the list. */ + unsigned prev : 10; /* 18 */ + unsigned next : 10; /* 28 */ /* The number of out and in parameter for a call. */ - unsigned callo : 2; - unsigned calli : 6; + unsigned calli : 4; /* 32 */ + unsigned callo : 2; /* 34 */ - /* Index of the arguments for this op, or -1 for zero-operand ops. */ - signed args : 16; + /* Index of the arguments for this op, or 0 for zero-operand ops. */ + unsigned args : 14; /* 48 */ - /* Index of the prex/next op, or -1 for the end of the list. */ - signed prev : 16; - signed next : 16; + /* Lifetime data of the operands. */ + unsigned life : 16; /* 64 */ } TCGOp; -QEMU_BUILD_BUG_ON(NB_OPS > 0xff); -QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff); -QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff); +/* Make sure operands fit in the bitfields above. */ +QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); +QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10)); +QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14)); + +/* Make sure that we don't overflow 64 bits without noticing. */ +QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8); struct TCGContext { uint8_t *pool_cur, *pool_end; @@ -507,20 +617,14 @@ struct TCGContext { int nb_labels; int nb_globals; int nb_temps; + int nb_indirects; /* goto_tb support */ tcg_insn_unit *code_buf; - uintptr_t *tb_next; - uint16_t *tb_next_offset; - uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ - - /* liveness analysis */ - uint16_t *op_dead_args; /* for each operation, each bit tells if the - corresponding argument is dead */ - uint8_t *op_sync_args; /* for each operation, each bit tells if the - corresponding output argument needs to be - sync to memory. */ - + uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */ + uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */ + uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */ + TCGRegSet reserved_regs; intptr_t current_frame_offset; intptr_t frame_start; @@ -556,8 +660,6 @@ struct TCGContext { int goto_tb_issue_mask; #endif - int gen_first_op_idx; - int gen_last_op_idx; int gen_next_op_idx; int gen_next_parm_idx; @@ -576,6 +678,10 @@ struct TCGContext { TBContext tb_ctx; + /* Track which vCPU triggers events */ + CPUState *cpu; /* *_trans */ + TCGv_env tcg_env; /* *_exec */ + /* The TCGBackendData structure is private to tcg-target.inc.c. */ struct TCGBackendData *be; @@ -595,6 +701,12 @@ struct TCGContext { extern TCGContext tcg_ctx; +static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v) +{ + int op_argi = tcg_ctx.gen_op_buf[op_idx].args; + tcg_ctx.gen_opparam_buf[op_argi + arg] = v; +} + /* The number of opcodes emitted so far. */ static inline int tcg_op_buf_count(void) { @@ -757,15 +869,6 @@ do {\ abort();\ } while (0) -#ifdef CONFIG_DEBUG_TCG -# define tcg_debug_assert(X) do { assert(X); } while (0) -#elif QEMU_GNUC_PREREQ(4, 5) -# define tcg_debug_assert(X) \ - do { if (!(X)) { __builtin_unreachable(); } } while (0) -#else -# define tcg_debug_assert(X) do { (void)(X); } while (0) -#endif - void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); #if UINTPTR_MAX == UINT32_MAX @@ -796,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, int nargs, TCGArg *args); void tcg_op_remove(TCGContext *s, TCGOp *op); +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); + void tcg_optimize(TCGContext *s); /* only used for debugging purposes */ @@ -919,7 +1025,7 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) /** * tcg_qemu_tb_exec: - * @env: CPUArchState * for the CPU + * @env: pointer to CPUArchState for the CPU * @tb_ptr: address of generated code for the TB to execute * * Start executing code from a given translation block. @@ -930,30 +1036,31 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) * which has not yet been directly linked, or an asynchronous * event such as an interrupt needs handling. * - * The return value is a pointer to the next TB to execute - * (if known; otherwise zero). This pointer is assumed to be - * 4-aligned, and the bottom two bits are used to return further - * information: + * Return: The return value is the value passed to the corresponding + * tcg_gen_exit_tb() at translation time of the last TB attempted to execute. + * The value is either zero or a 4-byte aligned pointer to that TB combined + * with additional information in its two least significant bits. The + * additional information is encoded as follows: * 0, 1: the link between this TB and the next is via the specified * TB index (0 or 1). That is, we left the TB via (the equivalent * of) "goto_tb <index>". The main loop uses this to determine * how to link the TB just executed to the next. * 2: we are using instruction counting code generation, and we * did not start executing this TB because the instruction counter - * would hit zero midway through it. In this case the next-TB pointer + * would hit zero midway through it. In this case the pointer * returned is the TB we were about to execute, and the caller must * arrange to execute the remaining count of instructions. * 3: we stopped because the CPU's exit_request flag was set * (usually meaning that there is an interrupt that needs to be - * handled). The next-TB pointer returned is the TB we were - * about to execute when we noticed the pending exit request. + * handled). The pointer returned is the TB we were about to execute + * when we noticed the pending exit request. * * If the bottom two bits indicate an exit-via-index then the CPU * state is correctly synchronised and ready for execution of the next * TB (and in particular the guest PC is the address to execute next). * Otherwise, we gave up on execution of this TB before it started, and * the caller must fix up the CPU state by calling the CPU's - * synchronize_from_tb() method with the next-TB pointer we return (falling + * synchronize_from_tb() method with the TB pointer we return (falling * back to calling the CPU's set_pc method with tb->pb if no * synchronize_from_tb() method exists). * diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 3942f9cccf..868228b2e7 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -37,10 +37,9 @@ * Therefore, we need both 32 and 64 bit virtual machines (interpreter). */ -#if !defined(TCG_TARGET_H) +#ifndef TCG_TARGET_H #define TCG_TARGET_H - #define TCG_TARGET_INTERPRETER 1 #define TCG_TARGET_INSN_UNIT_SIZE 1 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c index e2fc52a167..3c47ea7a9e 100644 --- a/tcg/tci/tcg-target.inc.c +++ b/tcg/tci/tcg-target.inc.c @@ -553,17 +553,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out64(s, args[0]); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { + if (s->tb_jmp_insn_offset) { /* Direct jump method. */ - tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_jmp_insn_offset)); + /* Align for atomic patching and thread safety */ + s->code_ptr = QEMU_ALIGN_PTR_UP(s->code_ptr, 4); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* Indirect jump method. */ TODO(); } - tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_jmp_reset_offset)); + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: tci_out_label(s, arg_label(args[0])); @@ -832,6 +834,12 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, old_code_ptr[1] = s->code_ptr - old_code_ptr; } +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + /* Test if a constant matches the constraint. */ static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct) |