diff options
author | Don Dugger <n0ano@n0ano.com> | 2016-06-03 03:33:22 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@172.30.200.206> | 2016-06-03 03:33:23 +0000 |
commit | da27230f80795d0028333713f036d44c53cb0e68 (patch) | |
tree | b3d379eaf000adf72b36cb01cdf4d79c3e3f064c /qemu/tcg | |
parent | 0e68cb048bb8aadb14675f5d4286d8ab2fc35449 (diff) | |
parent | 437fd90c0250dee670290f9b714253671a990160 (diff) |
Merge "These changes are the raw update to qemu-2.6."
Diffstat (limited to 'qemu/tcg')
-rw-r--r-- | qemu/tcg/README | 37 | ||||
-rw-r--r-- | qemu/tcg/aarch64/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/aarch64/tcg-target.inc.c (renamed from qemu/tcg/aarch64/tcg-target.c) | 108 | ||||
-rw-r--r-- | qemu/tcg/arm/tcg-target.inc.c (renamed from qemu/tcg/arm/tcg-target.c) | 22 | ||||
-rw-r--r-- | qemu/tcg/i386/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/i386/tcg-target.inc.c (renamed from qemu/tcg/i386/tcg-target.c) | 74 | ||||
-rw-r--r-- | qemu/tcg/ia64/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/ia64/tcg-target.inc.c (renamed from qemu/tcg/ia64/tcg-target.c) | 50 | ||||
-rw-r--r-- | qemu/tcg/mips/tcg-target.h | 11 | ||||
-rw-r--r-- | qemu/tcg/mips/tcg-target.inc.c (renamed from qemu/tcg/mips/tcg-target.c) | 273 | ||||
-rw-r--r-- | qemu/tcg/optimize.c | 261 | ||||
-rw-r--r-- | qemu/tcg/ppc/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/ppc/tcg-target.inc.c (renamed from qemu/tcg/ppc/tcg-target.c) | 171 | ||||
-rw-r--r-- | qemu/tcg/s390/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/s390/tcg-target.inc.c (renamed from qemu/tcg/s390/tcg-target.c) | 58 | ||||
-rw-r--r-- | qemu/tcg/sparc/tcg-target.h | 3 | ||||
-rw-r--r-- | qemu/tcg/sparc/tcg-target.inc.c (renamed from qemu/tcg/sparc/tcg-target.c) | 46 | ||||
-rw-r--r-- | qemu/tcg/tcg-be-ldst.h | 11 | ||||
-rw-r--r-- | qemu/tcg/tcg-be-null.h | 3 | ||||
-rw-r--r-- | qemu/tcg/tcg-common.c | 38 | ||||
-rw-r--r-- | qemu/tcg/tcg-op.c | 49 | ||||
-rw-r--r-- | qemu/tcg/tcg-op.h | 66 | ||||
-rw-r--r-- | qemu/tcg/tcg-opc.h | 23 | ||||
-rw-r--r-- | qemu/tcg/tcg.c | 891 | ||||
-rw-r--r-- | qemu/tcg/tcg.h | 113 | ||||
-rw-r--r-- | qemu/tcg/tci/README | 4 | ||||
-rw-r--r-- | qemu/tcg/tci/tcg-target.h | 4 | ||||
-rw-r--r-- | qemu/tcg/tci/tcg-target.inc.c (renamed from qemu/tcg/tci/tcg-target.c) | 48 |
28 files changed, 1382 insertions, 997 deletions
diff --git a/qemu/tcg/README b/qemu/tcg/README index a550ff176..f4a8ac170 100644 --- a/qemu/tcg/README +++ b/qemu/tcg/README @@ -314,11 +314,17 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) -* trunc_shr_i32 t0, t1, pos +* extrl_i64_i32 t0, t1 -For 64-bit hosts only, right shift the 64-bit input T1 by POS and -truncate to 32-bit output T0. Depending on the host, this may be -a simple mov/shift, or may require additional canonicalization. +For 64-bit hosts only, extract the low 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple move, +or may require additional canonicalization. + +* extrh_i64_i32 t0, t1 + +For 64-bit hosts only, extract the high 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple shift, +or may require additional canonicalization. ********* Conditional moves @@ -454,8 +460,9 @@ function tcg_gen_xxx(args). 4) Backend -tcg-target.h contains the target specific definitions. tcg-target.c -contains the target specific code. +tcg-target.h contains the target specific definitions. tcg-target.inc.c +contains the target specific code; it is #included by tcg/tcg.c, rather +than being a standalone C file. 4.1) Assumptions @@ -466,13 +473,25 @@ On a 32 bit target, all 64 bit operations are converted to 32 bits. A few specific operations must be implemented to allow it (see add2_i32, sub2_i32, brcond2_i32). +On a 64 bit target, the values are transfered between 32 and 64-bit +registers using the following ops: +- trunc_shr_i64_i32 +- ext_i32_i64 +- extu_i32_i64 + +They ensure that the values are correctly truncated or extended when +moved from a 32-bit to a 64-bit register or vice-versa. Note that the +trunc_shr_i64_i32 is an optional op. It is not necessary to implement +it if all the following conditions are met: +- 64-bit registers can hold 32-bit values +- 32-bit values in a 64-bit register do not need to stay zero or + sign extended +- all 32-bit TCG ops ignore the high part of 64-bit registers + Floating point operations are not supported in this version. A previous incarnation of the code generator had full support of them, but it is better to concentrate on integer operations first. -On a 64 bit target, no assumption is made in TCG about the storage of -the 32 bit values in 64 bit registers. - 4.2) Constraints GCC like constraints are used to define the constraints of every diff --git a/qemu/tcg/aarch64/tcg-target.h b/qemu/tcg/aarch64/tcg-target.h index 8aec04d2b..19a04a6e7 100644 --- a/qemu/tcg/aarch64/tcg-target.h +++ b/qemu/tcg/aarch64/tcg-target.h @@ -70,7 +70,8 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/qemu/tcg/aarch64/tcg-target.c b/qemu/tcg/aarch64/tcg-target.inc.c index b7ec4f5ac..a8fb4420d 100644 --- a/qemu/tcg/aarch64/tcg-target.c +++ b/qemu/tcg/aarch64/tcg-target.inc.c @@ -18,19 +18,19 @@ makes things much cleaner. */ QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", }; -#endif /* NDEBUG */ +#endif /* CONFIG_DEBUG_TCG */ static const int tcg_target_reg_alloc_order[] = { TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, - TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ + TCG_REG_X28, /* we will reserve this for guest_base if configured */ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, @@ -56,17 +56,18 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_TMP TCG_REG_X30 #ifndef CONFIG_SOFTMMU -# ifdef CONFIG_USE_GUEST_BASE -# define TCG_REG_GUEST_BASE TCG_REG_X28 -# else -# define TCG_REG_GUEST_BASE TCG_REG_XZR -# endif +/* Note that XZR cannot be encoded in the address base register slot, + as that actaully encodes SP. So if we need to zero-extend the guest + address, via the address index register slot, we need to load even + a zero guest base into a register. */ +#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) +#define TCG_REG_GUEST_BASE TCG_REG_X28 #endif static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 26)); + tcg_debug_assert(offset == sextract64(offset, 0, 26)); /* read instruction, mask away previous PC_REL26 parameter contents, set the proper offset, then write back the instruction. */ *code_ptr = deposit32(*code_ptr, 0, 26, offset); @@ -75,14 +76,14 @@ static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 19)); + tcg_debug_assert(offset == sextract64(offset, 0, 19)); *code_ptr = deposit32(*code_ptr, 5, 19, offset); } static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - assert(addend == 0); + tcg_debug_assert(addend == 0); switch (type) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: @@ -401,7 +402,7 @@ static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, insn |= pre << 24; insn |= w << 23; - assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); + tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); insn |= (ofs & (0x7f << 3)) << (15 - 3); tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); @@ -411,9 +412,9 @@ static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, TCGReg rd, TCGReg rn, uint64_t aimm) { if (aimm > 0xfff) { - assert((aimm & 0xfff) == 0); + tcg_debug_assert((aimm & 0xfff) == 0); aimm >>= 12; - assert(aimm <= 0xfff); + tcg_debug_assert(aimm <= 0xfff); aimm |= 1 << 12; /* apply LSL 12 */ } tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); @@ -443,7 +444,7 @@ static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, TCGReg rd, uint16_t half, unsigned shift) { - assert((shift & ~0x30) == 0); + tcg_debug_assert((shift & ~0x30) == 0); tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); } @@ -537,7 +538,7 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, { unsigned h, l, r, c; - assert(is_limm(limm)); + tcg_debug_assert(is_limm(limm)); h = clz64(limm); l = ctz64(limm); @@ -792,7 +793,7 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) { ptrdiff_t offset = target - s->code_ptr; - assert(offset == sextract64(offset, 0, 26)); + tcg_debug_assert(offset == sextract64(offset, 0, 26)); tcg_out_insn(s, 3206, B, offset); } @@ -866,7 +867,7 @@ static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, offset = tcg_in32(s) >> 5; } else { offset = l->u.value_ptr - s->code_ptr; - assert(offset == sextract64(offset, 0, 19)); + tcg_debug_assert(offset == sextract64(offset, 0, 19)); } if (need_cmp) { @@ -989,7 +990,7 @@ static void * const qemu_st_helpers[16] = { static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) { ptrdiff_t offset = tcg_pcrel_diff(s, target); - assert(offset == sextract64(offset, 0, 21)); + tcg_debug_assert(offset == sextract64(offset, 0, 21)); tcg_out_insn(s, 3406, ADR, rd, offset); } @@ -1051,14 +1052,29 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, slow path for the failure case, which will be patched later when finalizing the slow path. Generated code returns the host addend in X1, clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, tcg_insn_unit **label_ptr, int mem_index, bool is_read) { - TCGReg base = TCG_AREG0; int tlb_offset = is_read ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + int s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg base = TCG_AREG0, x3; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + tlb_mask = TARGET_PAGE_MASK | s_mask; + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask); + tlb_mask = TARGET_PAGE_MASK; + x3 = TCG_REG_X3; + } /* Extract the TLB index from the address into X0. X0<CPU_TLB_BITS:0> = @@ -1066,11 +1082,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); - /* Store the page mask part of the address and the low s_bits into X3. - Later this allows checking for equality and alignment at the same time. - X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, - addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, tlb_mask); /* Add any "high bits" from the tlb offset to the env address into X2, to take advantage of the LSL12 form of the ADDI instruction. @@ -1207,18 +1221,21 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); tcg_out_qemu_ld_direct(s, memop, ext, data_reg, TCG_REG_X1, otype, addr_reg); add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, - otype, addr_reg); + if (USE_GUEST_BASE) { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } #endif /* CONFIG_SOFTMMU */ } @@ -1229,18 +1246,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); tcg_out_qemu_st_direct(s, memop, data_reg, TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg, - s->code_ptr, label_ptr); + add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, + data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_st_direct(s, memop, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, - otype, addr_reg); + if (USE_GUEST_BASE) { + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_st_direct(s, memop, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } #endif /* CONFIG_SOFTMMU */ } @@ -1274,7 +1294,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #ifndef USE_DIRECT_JUMP #error "USE_DIRECT_JUMP required for aarch64" #endif - assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ + tcg_debug_assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ s->tb_jmp_offset[a0] = tcg_current_code_size(s); /* actual branch destination will be patched by aarch64_tb_set_jmp_target later, beware retranslation. */ @@ -1556,6 +1576,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i32: tcg_out_sxt(s, ext, MO_16, a0, a1); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); break; @@ -1567,6 +1588,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i32: tcg_out_uxt(s, MO_16, a0, a1); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_movr(s, TCG_TYPE_I32, a0, a1); break; @@ -1712,6 +1734,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_ext8u_i64, { "r", "r" } }, { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, @@ -1794,9 +1818,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); -#if defined(CONFIG_USE_GUEST_BASE) - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); +#if !defined(CONFIG_SOFTMMU) + if (USE_GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); } #endif diff --git a/qemu/tcg/arm/tcg-target.c b/qemu/tcg/arm/tcg-target.inc.c index ae2ec7a92..2b7fbddbf 100644 --- a/qemu/tcg/arm/tcg-target.c +++ b/qemu/tcg/arm/tcg-target.inc.c @@ -67,7 +67,7 @@ bool use_idiv_instructions; # define USING_SOFTMMU 0 #endif -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", @@ -124,8 +124,8 @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - assert(type == R_ARM_PC24); - assert(addend == 0); + tcg_debug_assert(type == R_ARM_PC24); + tcg_debug_assert(addend == 0); reloc_pc24(code_ptr, (tcg_insn_unit *)value); } @@ -492,7 +492,7 @@ static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, */ if (rhs_is_const) { int rot = encode_imm(rhs); - assert(rot >= 0); + tcg_debug_assert(rot >= 0); tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); } else { tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); @@ -511,7 +511,7 @@ static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, if (rot < 0) { rhs = ~rhs; rot = encode_imm(rhs); - assert(rot >= 0); + tcg_debug_assert(rot >= 0); opc = opinv; } tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); @@ -532,7 +532,7 @@ static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, if (rot < 0) { rhs = -rhs; rot = encode_imm(rhs); - assert(rot >= 0); + tcg_debug_assert(rot >= 0); opc = opneg; } tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); @@ -1100,7 +1100,7 @@ static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ } else { \ int ofs = (argreg - 4) * 4; \ EXT_ARG; \ - assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ + tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ } \ return argreg + 1; \ @@ -1493,8 +1493,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); @@ -1623,8 +1623,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { diff --git a/qemu/tcg/i386/tcg-target.h b/qemu/tcg/i386/tcg-target.h index 25b513354..92be34171 100644 --- a/qemu/tcg/i386/tcg-target.h +++ b/qemu/tcg/i386/tcg-target.h @@ -102,7 +102,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/qemu/tcg/i386/tcg-target.c b/qemu/tcg/i386/tcg-target.inc.c index 887f22f67..007407c3f 100644 --- a/qemu/tcg/i386/tcg-target.c +++ b/qemu/tcg/i386/tcg-target.inc.c @@ -24,7 +24,7 @@ #include "tcg-be-ldst.h" -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #if TCG_TARGET_REG_BITS == 64 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", @@ -425,7 +425,7 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) } if (opc & P_DATA16) { /* We should never be asking for both 16 and 64-bit operation. */ - assert((opc & P_REXW) == 0); + tcg_debug_assert((opc & P_REXW) == 0); tcg_out8(s, 0x66); } if (opc & P_ADDR32) { @@ -599,7 +599,7 @@ static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, if (index < 0) { index = 4; } else { - assert(index != TCG_REG_ESP); + tcg_debug_assert(index != TCG_REG_ESP); } tcg_out_opc(s, opc, r, rm, index); @@ -745,14 +745,14 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg) static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) { /* movzbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); } static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) { /* movsbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); } @@ -1172,14 +1172,16 @@ static void * const qemu_st_helpers[16] = { First argument register is clobbered. */ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, + int mem_index, TCGMemOp opc, tcg_insn_unit **label_ptr, int which) { const TCGReg r0 = TCG_REG_L0; const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; - TCGType htype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1187,20 +1189,29 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, trexw = P_REXW; } if (TCG_TYPE_PTR == TCG_TYPE_I64) { - htype = TCG_TYPE_I64; hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } } } - tcg_out_mov(s, htype, r0, addrlo); - tcg_out_mov(s, ttype, r1, addrlo); + tcg_out_mov(s, tlbtype, r0, addrlo); + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } - tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); - tgen_arithi(s, ARITH_AND + hrexw, r0, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, @@ -1424,7 +1435,7 @@ int arch_prctl(int code, unsigned long addr); static int guest_base_flags; static inline void setup_guest_base_seg(void) { - if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { + if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { guest_base_flags = P_GS; } } @@ -1545,7 +1556,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1558,9 +1568,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ @@ -1571,7 +1580,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int index = -1; int seg = 0; @@ -1580,7 +1589,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) We can do this with the ADDR32 prefix if we're not using a guest base, or when using segmentation. Otherwise we need to zero-extend manually. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1591,8 +1600,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + if (offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); index = TCG_REG_L1; offset = 0; } @@ -1687,7 +1696,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1700,9 +1708,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ @@ -1713,12 +1720,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int seg = 0; /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1727,12 +1734,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) } else if (TCG_TARGET_REG_BITS == 64) { /* ??? Note that we can't use the same SIB addressing scheme as for loads, since we require L0 free for bswap. */ - if (offset != GUEST_BASE) { + if (offset != guest_base) { if (TARGET_LONG_BITS == 32) { tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); base = TCG_REG_L1; offset = 0; @@ -2064,9 +2071,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_bswap64_i64: tcg_out_bswap64(s, args[0]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext32u(s, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext32s(s, args[0], args[1]); break; @@ -2201,6 +2210,9 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, @@ -2306,8 +2318,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc(s, OPC_RET, 0, 0, 0); #if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to GUEST_BASE. */ - if (GUEST_BASE) { + /* Try to set up a segment register to point to guest_base. */ + if (guest_base) { setup_guest_base_seg(); } #endif diff --git a/qemu/tcg/ia64/tcg-target.h b/qemu/tcg/ia64/tcg-target.h index a04ed8126..ae9b79f02 100644 --- a/qemu/tcg/ia64/tcg-target.h +++ b/qemu/tcg/ia64/tcg-target.h @@ -160,7 +160,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/qemu/tcg/ia64/tcg-target.c b/qemu/tcg/ia64/tcg-target.inc.c index 81cb9f79f..7557e6a9d 100644 --- a/qemu/tcg/ia64/tcg-target.c +++ b/qemu/tcg/ia64/tcg-target.inc.c @@ -27,7 +27,7 @@ * Register definitions */ -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", @@ -40,13 +40,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R55 -#else -#define TCG_GUEST_BASE_REG TCG_REG_R0 -#endif -#ifndef GUEST_BASE -#define GUEST_BASE 0 #endif /* Branch registers */ @@ -715,8 +710,8 @@ static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - assert(addend == 0); - assert(type == R_IA64_PCREL21B); + tcg_debug_assert(addend == 0); + tcg_debug_assert(type == R_IA64_PCREL21B); reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); } @@ -814,7 +809,7 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type, static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) { - assert(src == sextract64(src, 0, 22)); + tcg_debug_assert(src == sextract64(src, 0, 22)); return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); } @@ -1577,7 +1572,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, be->labels = l; } -static void tcg_out_tb_finalize(TCGContext *s) +static bool tcg_out_tb_finalize(TCGContext *s) { static const void * const helpers[8] = { helper_ret_stb_mmu, @@ -1599,7 +1594,7 @@ static void tcg_out_tb_finalize(TCGContext *s) /* The out-of-line thunks are all the same; load the return address from B0, load the GP, and branch to the code. Note that we are always post-call, so the register window has rolled, so we're - using incomming parameter register numbers, not outgoing. */ + using incoming parameter register numbers, not outgoing. */ if (dest == NULL) { uintptr_t *desc = (uintptr_t *)helpers[x]; uintptr_t func = desc[0], gp = desc[1], disp; @@ -1625,7 +1620,16 @@ static void tcg_out_tb_finalize(TCGContext *s) } reloc_pcrel21b_slot2(l->label_ptr, dest); + + /* Test for (pending) buffer overflow. The assumption is that any + one operation beginning below the high water mark cannot overrun + the buffer completely. Thus we can test for overflow after + generating code without having to check during generation. */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return false; + } } + return true; } static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) @@ -1765,7 +1769,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1829,7 +1833,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) } } #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, MmI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg), @@ -1889,7 +1893,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1935,7 +1939,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) INSN_NOP_M, INSN_NOP_I); #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg); addr_reg = TCG_REG_R2; @@ -1944,7 +1948,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) } if (!bswap) { - tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), + tcg_out_bundle(s, (guest_base ? MmI : mmI), add_guest_base, tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, addr_reg), @@ -2148,9 +2152,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); break; @@ -2301,6 +2307,8 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "rZ"} }, { INDEX_op_ext32s_i64, { "r", "rZ"} }, { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_ext_i32_i64, { "r", "rZ" } }, + { INDEX_op_extu_i32_i64, { "r", "rZ" } }, { INDEX_op_bswap16_i64, { "r", "rZ" } }, { INDEX_op_bswap32_i64, { "r", "rZ" } }, @@ -2349,14 +2357,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R33, 0)); - /* ??? If GUEST_BASE < 0x200000, we could load the register via + /* ??? If guest_base < 0x200000, we could load the register via an ADDL in the M slot of the next bundle. */ - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (GUEST_BASE), + tcg_opc_l2(guest_base), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, GUEST_BASE)); + TCG_GUEST_BASE_REG, guest_base)); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } diff --git a/qemu/tcg/mips/tcg-target.h b/qemu/tcg/mips/tcg-target.h index f5ba52cac..b1cda37b6 100644 --- a/qemu/tcg/mips/tcg-target.h +++ b/qemu/tcg/mips/tcg-target.h @@ -96,6 +96,13 @@ extern bool use_mips32_instructions; extern bool use_mips32r2_instructions; #endif +/* MIPS32R6 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6) +#define use_mips32r6_instructions 1 +#else +#define use_mips32r6_instructions 0 +#endif + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 @@ -105,8 +112,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) +#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 diff --git a/qemu/tcg/mips/tcg-target.c b/qemu/tcg/mips/tcg-target.inc.c index e97980df0..aaf881cfd 100644 --- a/qemu/tcg/mips/tcg-target.c +++ b/qemu/tcg/mips/tcg-target.inc.c @@ -35,7 +35,7 @@ #define LO_OFF (MIPS_BE * 4) #define HI_OFF (4 - LO_OFF) -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", "at", @@ -76,7 +76,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #define TCG_TMP1 TCG_REG_T9 /* check if we really need so many registers :P */ -static const TCGReg tcg_target_reg_alloc_order[] = { +static const int tcg_target_reg_alloc_order[] = { /* Call saved registers. */ TCG_REG_S0, TCG_REG_S1, @@ -127,7 +127,7 @@ static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) { /* Let the compiler perform the right-shift as part of the arithmetic. */ ptrdiff_t disp = target - (pc + 1); - assert(disp == (int16_t)disp); + tcg_debug_assert(disp == (int16_t)disp); return disp & 0xffff; } @@ -138,7 +138,7 @@ static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) { - assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); + tcg_debug_assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); return ((uintptr_t)target >> 2) & 0x3ffffff; } @@ -150,8 +150,8 @@ static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - assert(type == R_MIPS_PC16); - assert(addend == 0); + tcg_debug_assert(type == R_MIPS_PC16); + tcg_debug_assert(addend == 0); reloc_pc16(code_ptr, (tcg_insn_unit *)value); } @@ -288,16 +288,24 @@ typedef enum { OPC_SRLV = OPC_SPECIAL | 0x06, OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, OPC_SRAV = OPC_SPECIAL | 0x07, - OPC_JR = OPC_SPECIAL | 0x08, + OPC_JR_R5 = OPC_SPECIAL | 0x08, OPC_JALR = OPC_SPECIAL | 0x09, OPC_MOVZ = OPC_SPECIAL | 0x0A, OPC_MOVN = OPC_SPECIAL | 0x0B, OPC_MFHI = OPC_SPECIAL | 0x10, OPC_MFLO = OPC_SPECIAL | 0x12, OPC_MULT = OPC_SPECIAL | 0x18, + OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, + OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, OPC_MULTU = OPC_SPECIAL | 0x19, + OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, + OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, OPC_DIV = OPC_SPECIAL | 0x1A, + OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, + OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, OPC_DIVU = OPC_SPECIAL | 0x1B, + OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, + OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, OPC_ADDU = OPC_SPECIAL | 0x21, OPC_SUBU = OPC_SPECIAL | 0x23, OPC_AND = OPC_SPECIAL | 0x24, @@ -306,13 +314,15 @@ typedef enum { OPC_NOR = OPC_SPECIAL | 0x27, OPC_SLT = OPC_SPECIAL | 0x2A, OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_SELEQZ = OPC_SPECIAL | 0x35, + OPC_SELNEZ = OPC_SPECIAL | 0x37, OPC_REGIMM = 0x01 << 26, OPC_BLTZ = OPC_REGIMM | (0x00 << 16), OPC_BGEZ = OPC_REGIMM | (0x01 << 16), OPC_SPECIAL2 = 0x1c << 26, - OPC_MUL = OPC_SPECIAL2 | 0x002, + OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, OPC_EXT = OPC_SPECIAL3 | 0x000, @@ -320,6 +330,15 @@ typedef enum { OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, + + /* MIPS r6 doesn't have JR, JALR should be used instead */ + OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, + + /* + * MIPS r6 replaces MUL with an alternative encoding which is + * backwards-compatible at the assembly level. + */ + OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, } MIPSInsn; /* @@ -413,7 +432,7 @@ static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) if ((from ^ dest) & -(1 << 28)) { return false; } - assert((dest & 3) == 0); + tcg_debug_assert((dest & 3) == 0); inst = opc; inst |= (dest >> 2) & 0x3ffffff; @@ -567,6 +586,55 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) } } +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); + } + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); + } else if (rl == al && rl == bl) { + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + } else { + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); + } +} + /* Bit 0 set if inversion required; bit 1 set if swapping required. */ #define MIPS_CMP_INV 1 #define MIPS_CMP_SWAP 2 @@ -739,9 +807,9 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg tmp0 = TCG_TMP0; TCGReg tmp1 = ret; - assert(ret != TCG_TMP0); + tcg_debug_assert(ret != TCG_TMP0); if (ret == ah || ret == bh) { - assert(ret != TCG_TMP1); + tcg_debug_assert(ret != TCG_TMP1); tmp1 = TCG_TMP1; } @@ -792,13 +860,20 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v) + TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) { - MIPSInsn m_opc = OPC_MOVN; + bool eqz = false; + + /* If one of the values is zero, put it last to match SEL*Z instructions */ + if (use_mips32r6_instructions && v1 == 0) { + v1 = v2; + v2 = 0; + cond = tcg_invert_cond(cond); + } switch (cond) { case TCG_COND_EQ: - m_opc = OPC_MOVZ; + eqz = true; /* FALLTHRU */ case TCG_COND_NE: if (c2 != 0) { @@ -811,14 +886,32 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, /* Minimize code size by preferring a compare not requiring INV. */ if (mips_cmp_map[cond] & MIPS_CMP_INV) { cond = tcg_invert_cond(cond); - m_opc = OPC_MOVZ; + eqz = true; } tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); c1 = TCG_TMP0; break; } - tcg_out_opc_reg(s, m_opc, ret, v, c1); + if (use_mips32r6_instructions) { + MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; + MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; + + if (v2 != 0) { + tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); + } + tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); + if (v2 != 0) { + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); + } + } else { + MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; + + tcg_out_opc_reg(s, m_opc, ret, v1, c1); + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + } } static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) @@ -934,9 +1027,11 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) /* Perform the tlb comparison operation. The complete host address is placed in BASE. Clobbers AT, T0, A0. */ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, - TCGReg addrh, int mem_index, TCGMemOp s_bits, + TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit *label_ptr[2], bool is_load) { + TCGMemOp s_bits = get_memop(oi) & MO_SIZE; + int mem_index = get_mmuidx(oi); int cmp_off = (is_load ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) @@ -962,30 +1057,34 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, add_off -= 0x7ff0; } - /* Load the tlb comparator. */ - if (TARGET_LONG_BITS == 64) { - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF); - tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF); - } else { - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off); - } + /* Load the (low half) tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, + cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); /* Mask the page bits, keeping the alignment bits to compare against. - In between, load the tlb addend for the fast path. */ + In between on 32-bit targets, load the tlb addend for the fast path. */ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + if (TARGET_LONG_BITS == 32) { + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + } tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); label_ptr[0] = s->code_ptr; tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + /* Load and test the high half tlb comparator. */ if (TARGET_LONG_BITS == 64) { /* delay slot */ - tcg_out_nop(s); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); + + /* Load the tlb addend for the fast path. We can't do it earlier with + 64-bit targets or we'll clobber a0 before reading the high half tlb + comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); label_ptr[1] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, addrh, base); + tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); } /* delay slot */ @@ -1156,8 +1255,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; - int mem_index; - TCGMemOp s_bits; #endif /* Note that we've eliminated V0 from the output registers, so we won't overwrite the base register during loading. */ @@ -1171,21 +1268,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) opc = get_memop(oi); #if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - - tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, - s_bits, label_ptr, 1); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0 && data_regl != addr_regl) { + if (guest_base == 0 && data_regl != addr_regl) { base = addr_regl; - } else if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); @@ -1233,55 +1326,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, - TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, - bool cbh, bool is_sub) -{ - TCGReg th = TCG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); - tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); - } - tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - } else { - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); - } -} - static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1290,8 +1334,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; - int mem_index; - TCGMemOp s_bits; #endif data_regl = *args++; @@ -1302,26 +1344,22 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) opc = get_memop(oi); #if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - s_bits = opc & 3; - /* Note that we eliminated the helper's address argument, so we can reuse that for the base. */ base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); - tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, - s_bits, label_ptr, 0); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0) { + if (guest_base == 0) { base = addr_regl; } else { base = TCG_REG_A0; - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } } @@ -1432,8 +1470,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_and_i32: if (c2 && a2 != (uint16_t)a2) { int msb = ctz32(~a2) - 1; - assert(use_mips32r2_instructions); - assert(is_p2m1(a2)); + tcg_debug_assert(use_mips32r2_instructions); + tcg_debug_assert(is_p2m1(a2)); tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); break; } @@ -1451,21 +1489,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, i1 = OPC_MULT, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_mulsh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); + break; + } i1 = OPC_MULT, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_muluh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); + break; + } i1 = OPC_MULTU, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_div_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_divu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_rem_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_remu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFHI; do_hilo1: tcg_out_opc_reg(s, i1, 0, a1, a2); @@ -1542,7 +1604,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); break; case INDEX_op_setcond_i32: @@ -1598,8 +1660,10 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +#if !use_mips32r6_instructions { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +#endif { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, @@ -1629,7 +1693,11 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if use_mips32r6_instructions + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, +#else { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, @@ -1667,7 +1735,6 @@ static int tcg_target_callee_save_regs[] = { /* The Linux kernel doesn't provide any information about the available instruction set. Probe it using a signal handler. */ -#include <signal.h> #ifndef use_movnz_instructions bool use_movnz_instructions = false; diff --git a/qemu/tcg/optimize.c b/qemu/tcg/optimize.c index 18283cfd7..f01160815 100644 --- a/qemu/tcg/optimize.c +++ b/qemu/tcg/optimize.c @@ -23,10 +23,8 @@ * THE SOFTWARE. */ -#include "config.h" +#include "qemu/osdep.h" -#include <stdlib.h> -#include <stdio.h> #include "qemu-common.h" #include "tcg-op.h" @@ -35,14 +33,8 @@ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -typedef enum { - TCG_TEMP_UNDEF = 0, - TCG_TEMP_CONST, - TCG_TEMP_COPY, -} tcg_temp_state; - struct tcg_temp_info { - tcg_temp_state state; + bool is_const; uint16_t prev_copy; uint16_t next_copy; tcg_target_ulong val; @@ -50,23 +42,47 @@ struct tcg_temp_info { }; static struct tcg_temp_info temps[TCG_MAX_TEMPS]; +static TCGTempSet temps_used; + +static inline bool temp_is_const(TCGArg arg) +{ + return temps[arg].is_const; +} + +static inline bool temp_is_copy(TCGArg arg) +{ + return temps[arg].next_copy != arg; +} -/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove - the copy flag from the left temp. */ +/* Reset TEMP's state, possibly removing the temp for the list of copies. */ static void reset_temp(TCGArg temp) { - if (temps[temp].state == TCG_TEMP_COPY) { - if (temps[temp].prev_copy == temps[temp].next_copy) { - temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; - } else { - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - } - } - temps[temp].state = TCG_TEMP_UNDEF; + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; temps[temp].mask = -1; } +/* Reset all temporaries, given that there are NB_TEMPS of them. */ +static void reset_all_temps(int nb_temps) +{ + bitmap_zero(temps_used.l, nb_temps); +} + +/* Initialize and activate a temporary. */ +static void init_temp_info(TCGArg temp) +{ + if (!test_bit(temp, temps_used.l)) { + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; + temps[temp].mask = -1; + set_bit(temp, temps_used.l); + } +} + static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc, int nargs) { @@ -98,16 +114,6 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, return new_op; } -/* Reset all temporaries, given that there are NB_TEMPS of them. */ -static void reset_all_temps(int nb_temps) -{ - int i; - for (i = 0; i < nb_temps; i++) { - temps[i].state = TCG_TEMP_UNDEF; - temps[i].mask = -1; - } -} - static int op_bits(TCGOpcode op) { const TCGOpDef *def = &tcg_op_defs[op]; @@ -179,8 +185,7 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return true; } - if (temps[arg1].state != TCG_TEMP_COPY - || temps[arg2].state != TCG_TEMP_COPY) { + if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { return false; } @@ -202,7 +207,7 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, op->opc = new_op; reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; + temps[dst].is_const = true; temps[dst].val = val; mask = val; if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { @@ -223,11 +228,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, return; } - if (temps[src].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op, args, dst, temps[src].val); - return; - } - TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; @@ -241,19 +241,13 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, } temps[dst].mask = mask; - assert(temps[src].state != TCG_TEMP_CONST); - if (s->temps[src].type == s->temps[dst].type) { - if (temps[src].state != TCG_TEMP_COPY) { - temps[src].state = TCG_TEMP_COPY; - temps[src].next_copy = src; - temps[src].prev_copy = src; - } - temps[dst].state = TCG_TEMP_COPY; temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; + temps[dst].is_const = temps[src].is_const; + temps[dst].val = temps[src].val; } args[0] = dst; @@ -292,7 +286,6 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); - case INDEX_op_trunc_shr_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -347,12 +340,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) CASE_OP_32_64(ext16u): return (uint16_t)x; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: return (int32_t)x; + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_extrh_i64_i32: + return (uint64_t)x >> 32; + case INDEX_op_muluh_i32: return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; case INDEX_op_mulsh_i32: @@ -395,7 +394,7 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) { TCGArg res = do_constant_folding_2(op, x, y); if (op_bits(op) == 32) { - res &= 0xffffffff; + res = (int32_t)res; } return res; } @@ -481,7 +480,7 @@ static bool do_constant_folding_cond_eq(TCGCond c) static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, TCGArg y, TCGCond c) { - if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { + if (temp_is_const(x) && temp_is_const(y)) { switch (op_bits(op)) { case 32: return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); @@ -492,7 +491,7 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, } } else if (temps_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { + } else if (temp_is_const(y) && temps[y].val == 0) { switch (c) { case TCG_COND_LTU: return 0; @@ -513,12 +512,10 @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) TCGArg al = p1[0], ah = p1[1]; TCGArg bl = p2[0], bh = p2[1]; - if (temps[bl].state == TCG_TEMP_CONST - && temps[bh].state == TCG_TEMP_CONST) { + if (temp_is_const(bl) && temp_is_const(bh)) { uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; - if (temps[al].state == TCG_TEMP_CONST - && temps[ah].state == TCG_TEMP_CONST) { + if (temp_is_const(al) && temp_is_const(ah)) { uint64_t a; a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; return do_constant_folding_cond_64(a, b, c); @@ -544,8 +541,8 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += temps[a1].state == TCG_TEMP_CONST; - sum -= temps[a2].state == TCG_TEMP_CONST; + sum += temp_is_const(a1); + sum -= temp_is_const(a2); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -560,10 +557,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += temps[p1[0]].state == TCG_TEMP_CONST; - sum += temps[p1[1]].state == TCG_TEMP_CONST; - sum -= temps[p2[0]].state == TCG_TEMP_CONST; - sum -= temps[p2[1]].state == TCG_TEMP_CONST; + sum += temp_is_const(p1[0]); + sum += temp_is_const(p1[1]); + sum -= temp_is_const(p2[0]); + sum -= temp_is_const(p2[1]); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -598,17 +595,29 @@ void tcg_optimize(TCGContext *s) const TCGOpDef *def = &tcg_op_defs[opc]; oi_next = op->next; + + /* Count the arguments, and initialize the temps that are + going to be used */ if (opc == INDEX_op_call) { nb_oargs = op->callo; nb_iargs = op->calli; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + tmp = args[i]; + if (tmp != TCG_CALL_DUMMY_ARG) { + init_temp_info(tmp); + } + } } else { nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + init_temp_info(args[i]); + } } /* Do copy propagation */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - if (temps[args[i]].state == TCG_TEMP_COPY) { + if (temp_is_copy(args[i])) { args[i] = find_better_copy(s, args[i]); } } @@ -678,8 +687,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(sar): CASE_OP_32_64(rotl): CASE_OP_32_64(rotr): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -689,7 +697,7 @@ void tcg_optimize(TCGContext *s) TCGOpcode neg_op; bool have_neg; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { /* Proceed with possible constant folding. */ break; } @@ -703,8 +711,7 @@ void tcg_optimize(TCGContext *s) if (!have_neg) { break; } - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { op->opc = neg_op; reset_temp(args[0]); args[1] = args[2]; @@ -714,34 +721,30 @@ void tcg_optimize(TCGContext *s) break; CASE_OP_32_64(xor): CASE_OP_32_64(nand): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { i = 1; goto try_not; } break; CASE_OP_32_64(nor): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { i = 1; goto try_not; } break; CASE_OP_32_64(andc): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == -1) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == -1) { i = 2; goto try_not; } break; CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == 0) { i = 2; goto try_not; } @@ -782,9 +785,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(xor): CASE_OP_32_64(andc): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -792,9 +794,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -832,17 +833,26 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): mask = temps[args[2]].mask; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { and_const: affected = temps[args[1]].mask & ~mask; } mask = temps[args[1]].mask & mask; break; + case INDEX_op_ext_i32_i64: + if ((temps[args[1]].mask & 0x80000000) != 0) { + break; + } + case INDEX_op_extu_i32_i64: + /* We do not compute affected as it is a size changing op. */ + mask = (uint32_t)temps[args[1]].mask; + break; + CASE_OP_32_64(andc): /* Known-zeros does not imply known-ones. Therefore unless args[2] is constant, we can't infer anything from it. */ - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { mask = ~temps[args[2]].mask; goto and_const; } @@ -851,37 +861,40 @@ void tcg_optimize(TCGContext *s) break; case INDEX_op_sar_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (int32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_sar_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (int64_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (uint32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (uint64_t)temps[args[1]].mask >> tmp; } break; - case INDEX_op_trunc_shr_i32: - mask = (uint64_t)temps[args[1]].mask >> args[2]; + case INDEX_op_extrl_i64_i32: + mask = (uint32_t)temps[args[1]].mask; + break; + case INDEX_op_extrh_i64_i32: + mask = (uint64_t)temps[args[1]].mask >> 32; break; CASE_OP_32_64(shl): - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); mask = temps[args[1]].mask << tmp; } @@ -946,12 +959,12 @@ void tcg_optimize(TCGContext *s) } if (partmask == 0) { - assert(nb_oargs == 1); + tcg_debug_assert(nb_oargs == 1); tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } if (affected == 0) { - assert(nb_oargs == 1); + tcg_debug_assert(nb_oargs == 1); tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -962,8 +975,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(mul): CASE_OP_32_64(muluh): CASE_OP_32_64(mulsh): - if ((temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0)) { + if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -1018,21 +1030,17 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16u): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: - if (temps[args[1]].state == TCG_TEMP_CONST) { + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; - case INDEX_op_trunc_shr_i32: - if (temps[args[1]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, args[0], tmp); - break; - } - goto do_default; - CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): @@ -1055,8 +1063,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(divu): CASE_OP_32_64(rem): CASE_OP_32_64(remu): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1065,8 +1072,7 @@ void tcg_optimize(TCGContext *s) goto do_default; CASE_OP_32_64(deposit): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1106,10 +1112,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[5]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3]) + && temp_is_const(args[4]) && temp_is_const(args[5])) { uint32_t al = temps[args[2]].val; uint32_t ah = temps[args[3]].val; uint32_t bl = temps[args[4]].val; @@ -1128,8 +1132,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1138,8 +1142,7 @@ void tcg_optimize(TCGContext *s) goto do_default; case INDEX_op_mulu2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3])) { uint32_t a = temps[args[2]].val; uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; @@ -1149,8 +1152,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1171,10 +1174,8 @@ void tcg_optimize(TCGContext *s) tcg_op_remove(s, op); } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0 - && temps[args[3]].val == 0) { + && temp_is_const(args[2]) && temps[args[2]].val == 0 + && temp_is_const(args[3]) && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_brcond_high: @@ -1236,10 +1237,8 @@ void tcg_optimize(TCGContext *s) do_setcond_const: tcg_opt_gen_movi(s, op, args, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[3]].val == 0 - && temps[args[4]].val == 0) { + && temp_is_const(args[3]) && temps[args[3]].val == 0 + && temp_is_const(args[4]) && temps[args[4]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_setcond_high: @@ -1299,7 +1298,9 @@ void tcg_optimize(TCGContext *s) if (!(args[nb_oargs + nb_iargs + 1] & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { - reset_temp(i); + if (test_bit(i, temps_used.l)) { + reset_temp(i); + } } } goto do_reset_output; diff --git a/qemu/tcg/ppc/tcg-target.h b/qemu/tcg/ppc/tcg-target.h index 7ce704882..b4f081876 100644 --- a/qemu/tcg/ppc/tcg-target.h +++ b/qemu/tcg/ppc/tcg-target.h @@ -77,7 +77,8 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/qemu/tcg/ppc/tcg-target.c b/qemu/tcg/ppc/tcg-target.inc.c index 2b6eafa03..00bb90fc2 100644 --- a/qemu/tcg/ppc/tcg-target.c +++ b/qemu/tcg/ppc/tcg-target.inc.c @@ -80,22 +80,16 @@ static tcg_insn_unit *tb_ret_addr; -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - #include "elf.h" static bool have_isa_2_06; #define HAVE_ISA_2_06 have_isa_2_06 #define HAVE_ISEL have_isa_2_06 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG 30 -#else -#define TCG_GUEST_BASE_REG 0 #endif -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "r0", "r1", @@ -213,7 +207,7 @@ static inline bool in_range_b(tcg_target_long target) static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(in_range_b(disp)); + tcg_debug_assert(in_range_b(disp)); return disp & 0x3fffffc; } @@ -225,7 +219,7 @@ static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(disp == (int16_t) disp); + tcg_debug_assert(disp == (int16_t) disp); return disp & 0xfffc; } @@ -251,7 +245,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, { tcg_insn_unit *target = (tcg_insn_unit *)value; - assert(addend == 0); + tcg_debug_assert(addend == 0); switch (type) { case R_PPC_REL14: reloc_pc14(code_ptr, target); @@ -571,7 +565,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, int sh, int mb) { - assert(TCG_TARGET_REG_BITS == 64); + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); @@ -706,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { int mb, me; - if ((c & 0xffff) == c) { + if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else if ((c & 0xffff) == c) { tcg_out32(s, ANDI | SAI(src, dst, c)); return; } else if ((c & 0xffff0000) == c) { tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); return; - } else if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); @@ -724,19 +718,19 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) { int mb, me; - assert(TCG_TARGET_REG_BITS == 64); - if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else if (mask64_operand(c, &mb, &me)) { + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); + if (mask64_operand(c, &mb, &me)) { if (mb == 0) { tcg_out_rld(s, RLDICR, dst, src, 0, me); } else { tcg_out_rld(s, RLDICL, dst, src, 0, mb); } + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); @@ -840,7 +834,7 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, { int opi, opx; - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); if (type == TCG_TYPE_I32) { opi = LWZ, opx = LWZX; } else { @@ -854,7 +848,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, { int opi, opx; - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); if (type == TCG_TYPE_I32) { opi = STW, opx = STWX; } else { @@ -987,7 +981,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, { int crop, sh; - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); /* Ignore high bits of a potential constant arg2. */ if (type == TCG_TYPE_I32) { @@ -1245,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - TCGContext s; + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; - s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_out_b(&s, 0, (tcg_insn_unit *)addr); - flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + tcg_debug_assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + tcg_debug_assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); } static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) @@ -1361,7 +1380,7 @@ static void * const qemu_st_helpers[16] = { in CR7, loads the addend of the TLB into R3, and returns the register containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, TCGReg addrlo, TCGReg addrhi, int mem_index, bool is_read) { @@ -1371,6 +1390,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); TCGReg base = TCG_AREG0; + TCGMemOp s_bits = opc & MO_SIZE; /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1422,17 +1442,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, to minimize any load use delay. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); - /* Clear the non-page, non-alignment bits from the address. */ + /* Clear the non-page, non-alignment bits from the address */ if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + /* We don't support unaligned accesses on 32-bits, preserve + * the bottom bits and thus trigger a comparison failure on + * unaligned accesses + */ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (!s_bits) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, - 0, 63 - TARGET_PAGE_BITS); + } else if (s_bits) { + /* > byte access, we need to handle alignment */ + if ((opc & MO_AMASK) == MO_ALIGN) { + /* Alignment required by the front-end, same as 32-bits */ + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } else { + /* We support unaligned accesses, we need to make sure we fail + * if we cross a page boundary. The trick is to add the + * access_size-1 to the address before masking the low bits. + * That will make the address overflow to the next page if we + * cross a page boundary which will then force a mismatch of + * the TLB compare since the next page cannot possibly be in + * the same TLB index. + */ + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); + tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, + 0, 63 - TARGET_PAGE_BITS); + } } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + /* Byte access, just chop off the bits below the page index */ + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); } if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { @@ -1592,7 +1632,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; @@ -1600,7 +1640,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1667,7 +1707,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; @@ -1675,7 +1715,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1779,9 +1819,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) } tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -1817,7 +1857,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) } /* Epilogue */ - assert(tb_ret_addr == s->code_ptr); + tcg_debug_assert(tb_ret_addr == s->code_ptr); tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { @@ -1840,12 +1880,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (USE_REG_RA) { ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); - /* If we can use a direct branch, otherwise use the value in RA. - Note that the direct branch is always forward. If it's in - range now, it'll still be in range after the movi. Don't - bother about the 20 bytes where the test here fails but it - would succeed below. */ - if (!in_range_b(disp)) { + /* Use a direct branch if we can, otherwise use the value in RA. + Note that the direct branch is always backward, thus we need + to account for the possibility of 5 insns from the movi. */ + if (!in_range_b(disp - 20)) { tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); tcg_out32(s, BCCTR | BO_ALWAYS); @@ -1856,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 7; - } else { - /* Indirect jump method. */ - tcg_abort(); + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: @@ -2200,12 +2240,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16s_i64: c = EXTSH; goto gen_ext; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: c = EXTSW; goto gen_ext; gen_ext: tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; case INDEX_op_setcond_i32: tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], @@ -2482,6 +2526,8 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext8s_i64, { "r", "r" } }, { INDEX_op_ext16s_i64, { "r", "r" } }, { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, @@ -2679,8 +2725,6 @@ static void __attribute__((constructor)) tcg_cache_init(void) } #elif defined __APPLE__ -#include <stdio.h> -#include <sys/types.h> #include <sys/sysctl.h> static void __attribute__((constructor)) tcg_cache_init(void) @@ -2699,11 +2743,6 @@ static void __attribute__((constructor)) tcg_cache_init(void) } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> #include <sys/sysctl.h> static void __attribute__((constructor)) tcg_cache_init(void) diff --git a/qemu/tcg/s390/tcg-target.h b/qemu/tcg/s390/tcg-target.h index 91576d594..d9dc03873 100644 --- a/qemu/tcg/s390/tcg-target.h +++ b/qemu/tcg/s390/tcg-target.h @@ -72,7 +72,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/qemu/tcg/s390/tcg-target.c b/qemu/tcg/s390/tcg-target.inc.c index aa718eca0..580553239 100644 --- a/qemu/tcg/s390/tcg-target.c +++ b/qemu/tcg/s390/tcg-target.inc.c @@ -51,17 +51,10 @@ /* A scratch register that may be be used throughout the backend. */ #define TCG_TMP0 TCG_REG_R14 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R13 -#else -#define TCG_GUEST_BASE_REG TCG_REG_R0 -#endif - -#ifndef GUEST_BASE -#define GUEST_BASE 0 #endif - /* All of the following instructions are prefixed with their instruction format, and are defined as 8- or 16-bit quantities, even when the two halves of the 16-bit quantity may appear 32 bits apart in the insn. @@ -228,7 +221,7 @@ typedef enum S390Opcode { RX_STH = 0x40, } S390Opcode; -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" @@ -355,15 +348,15 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); - assert(addend == -2); + tcg_debug_assert(addend == -2); switch (type) { case R_390_PC16DBL: - assert(pcrel2 == (int16_t)pcrel2); + tcg_debug_assert(pcrel2 == (int16_t)pcrel2); tcg_patch16(code_ptr, pcrel2); break; case R_390_PC32DBL: - assert(pcrel2 == (int32_t)pcrel2); + tcg_debug_assert(pcrel2 == (int32_t)pcrel2); tcg_patch32(code_ptr, pcrel2); break; default: @@ -1504,20 +1497,36 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, int mem_index, bool is_ld) { - TCGMemOp s_bits = opc & MO_SIZE; - uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); - int ofs; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + int ofs, a_off; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + a_off = 0; + tlb_mask = TARGET_PAGE_MASK | s_mask; + } else { + a_off = s_mask; + tlb_mask = TARGET_PAGE_MASK; + } if (facilities & FACILITY_GEN_INST_EXT) { tcg_out_risbg(s, TCG_REG_R2, addr_reg, 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, 63 - CPU_TLB_ENTRY_BITS, 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + if (a_off) { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } else { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } } else { tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); @@ -1622,9 +1631,9 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, tgen_ext32u(s, TCG_TMP0, *addr_reg); *addr_reg = TCG_TMP0; } - if (GUEST_BASE < 0x80000) { + if (guest_base < 0x80000) { *index_reg = TCG_REG_NONE; - *disp = GUEST_BASE; + *disp = guest_base; } else { *index_reg = TCG_GUEST_BASE_REG; *disp = 0; @@ -2090,6 +2099,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i64: tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tgen_ext32s(s, args[0], args[1]); break; @@ -2099,6 +2109,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tgen_ext32u(s, args[0], args[1]); break; @@ -2251,6 +2262,9 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_ext32s_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, @@ -2328,10 +2342,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, CPU_TEMP_BUF_NLONGS * sizeof(long)); - if (GUEST_BASE >= 0x80000) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +#ifndef CONFIG_SOFTMMU + if (guest_base >= 0x80000) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } +#endif tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); /* br %r3 (go to TB) */ diff --git a/qemu/tcg/sparc/tcg-target.h b/qemu/tcg/sparc/tcg-target.h index f584de476..2cd72d2d4 100644 --- a/qemu/tcg/sparc/tcg-target.h +++ b/qemu/tcg/sparc/tcg-target.h @@ -118,7 +118,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 1 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 diff --git a/qemu/tcg/sparc/tcg-target.c b/qemu/tcg/sparc/tcg-target.inc.c index 1a870a81d..d641cfd8c 100644 --- a/qemu/tcg/sparc/tcg-target.c +++ b/qemu/tcg/sparc/tcg-target.inc.c @@ -24,7 +24,7 @@ #include "tcg-be-null.h" -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%g0", "%g1", @@ -83,10 +83,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU # define TCG_GUEST_BASE_REG TCG_REG_I5 -#else -# define TCG_GUEST_BASE_REG TCG_REG_G0 #endif static const int tcg_target_reg_alloc_order[] = { @@ -291,7 +289,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, { uint32_t insn; - assert(addend == 0); + tcg_debug_assert(addend == 0); value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); switch (type) { @@ -955,9 +953,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | INSN_IMM13(-frame_size)); -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +#ifndef CONFIG_SOFTMMU + if (guest_base != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -1110,7 +1108,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, } else { func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; } - assert(func != NULL); + tcg_debug_assert(func != NULL); tcg_out_call_nodelay(s, func); /* delay slot */ tcg_out_movi(s, TCG_TYPE_I32, param, oi); @@ -1146,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1189,7 +1187,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, tcg_out_mov(s, TCG_TYPE_REG, param++, data); func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; - assert(func != NULL); + tcg_debug_assert(func != NULL); tcg_out_call_nodelay(s, func); /* delay slot */ tcg_out_movi(s, TCG_TYPE_I32, param, oi); @@ -1201,7 +1199,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1407,18 +1405,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: c = ARITH_UDIVX; goto gen_arith; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; - case INDEX_op_trunc_shr_i32: - if (a2 == 0) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - } else { - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); - } + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); break; case INDEX_op_brcond_i64: @@ -1531,9 +1530,12 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_neg_i64, { "R", "RJ" } }, { INDEX_op_not_i64, { "R", "RJ" } }, - { INDEX_op_ext32s_i64, { "R", "r" } }, - { INDEX_op_ext32u_i64, { "R", "r" } }, - { INDEX_op_trunc_shr_i32, { "r", "R" } }, + { INDEX_op_ext32s_i64, { "R", "R" } }, + { INDEX_op_ext32u_i64, { "R", "R" } }, + { INDEX_op_ext_i32_i64, { "R", "r" } }, + { INDEX_op_extu_i32_i64, { "R", "r" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, @@ -1643,7 +1645,7 @@ void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) /* We can reach the entire address space for 32-bit. For 64-bit the code_gen_buffer can't be larger than 2GB. */ - assert(disp == (int32_t)disp); + tcg_debug_assert(disp == (int32_t)disp); *ptr = CALL | (uint32_t)disp >> 2; flush_icache_range(jmp_addr, jmp_addr + 4); diff --git a/qemu/tcg/tcg-be-ldst.h b/qemu/tcg/tcg-be-ldst.h index 40a2369b7..17777aec5 100644 --- a/qemu/tcg/tcg-be-ldst.h +++ b/qemu/tcg/tcg-be-ldst.h @@ -56,7 +56,7 @@ static inline void tcg_out_tb_init(TCGContext *s) static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); -static void tcg_out_tb_finalize(TCGContext *s) +static bool tcg_out_tb_finalize(TCGContext *s) { TCGLabelQemuLdst *lb; @@ -67,7 +67,16 @@ static void tcg_out_tb_finalize(TCGContext *s) } else { tcg_out_qemu_st_slow_path(s, lb); } + + /* Test for (pending) buffer overflow. The assumption is that any + one operation beginning below the high water mark cannot overrun + the buffer completely. Thus we can test for overflow after + generating code without having to check during generation. */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return false; + } } + return true; } /* diff --git a/qemu/tcg/tcg-be-null.h b/qemu/tcg/tcg-be-null.h index 74c57d5a6..5222fe29e 100644 --- a/qemu/tcg/tcg-be-null.h +++ b/qemu/tcg/tcg-be-null.h @@ -38,6 +38,7 @@ static inline void tcg_out_tb_init(TCGContext *s) * Generate TB finalization at the end of block */ -static inline void tcg_out_tb_finalize(TCGContext *s) +static inline bool tcg_out_tb_finalize(TCGContext *s) { + return true; } diff --git a/qemu/tcg/tcg-common.c b/qemu/tcg/tcg-common.c new file mode 100644 index 000000000..97305a3ef --- /dev/null +++ b/qemu/tcg/tcg-common.c @@ -0,0 +1,38 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg/tcg.h" + +#if defined(CONFIG_TCG_INTERPRETER) +uintptr_t tci_tb_ptr; +#endif + +TCGOpDef tcg_op_defs[] = { +#define DEF(s, oargs, iargs, cargs, flags) \ + { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, +#include "tcg-opc.h" +#undef DEF +}; +const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); diff --git a/qemu/tcg/tcg-op.c b/qemu/tcg/tcg-op.c index 45098c310..f554b86d4 100644 --- a/qemu/tcg/tcg-op.c +++ b/qemu/tcg/tcg-op.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "tcg.h" #include "tcg-op.h" @@ -1737,28 +1738,28 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) /* Size changing operations. */ -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_debug_assert(count < 64); if (TCG_TARGET_REG_BITS == 32) { - if (count >= 32) { - tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); - } else if (count == 0) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); - tcg_gen_mov_i32(ret, TCGV_LOW(t)); - tcg_temp_free_i64(t); - } - } else if (TCG_TARGET_HAS_trunc_shr_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, - MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); - } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else if (TCG_TARGET_HAS_extrl_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); + } else { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } +} + +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); + } else if (TCG_TARGET_HAS_extrh_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); } else { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); + tcg_gen_shri_i64(t, arg, 32); tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); tcg_temp_free_i64(t); } @@ -1770,9 +1771,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1782,9 +1782,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1820,8 +1819,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) tcg_gen_mov_i32(lo, TCGV_LOW(arg)); tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); } else { - tcg_gen_trunc_shr_i64_i32(lo, arg, 0); - tcg_gen_trunc_shr_i64_i32(hi, arg, 32); + tcg_gen_extrl_i64_i32(lo, arg); + tcg_gen_extrh_i64_i32(hi, arg); } } diff --git a/qemu/tcg/tcg-op.h b/qemu/tcg/tcg-op.h index d1d763f6f..c446d3dc7 100644 --- a/qemu/tcg/tcg-op.h +++ b/qemu/tcg/tcg-op.h @@ -684,7 +684,8 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c); +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); @@ -693,28 +694,59 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) tcg_gen_deposit_i64(ret, lo, hi, 32, 32); } -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) -{ - tcg_gen_trunc_shr_i64_i32(ret, arg, 0); -} - /* QEMU specific operations. */ #ifndef TARGET_LONG_BITS #error must include QEMU headers #endif -/* debug info: write the PC of the corresponding QEMU CPU instruction */ -static inline void tcg_gen_debug_insn_start(uint64_t pc) +#if TARGET_INSN_START_WORDS == 1 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc) +{ + tcg_gen_op1(&tcg_ctx, INDEX_op_insn_start, pc); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc) +{ + tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start, + (uint32_t)pc, (uint32_t)(pc >> 32)); +} +# endif +#elif TARGET_INSN_START_WORDS == 2 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1) { - /* XXX: must really use a 32 bit size for TCGArg in all cases */ -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS - tcg_gen_op2ii(INDEX_op_debug_insn_start, - (uint32_t)(pc), (uint32_t)(pc >> 32)); + tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start, pc, a1); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1) +{ + tcg_gen_op4(&tcg_ctx, INDEX_op_insn_start, + (uint32_t)pc, (uint32_t)(pc >> 32), + (uint32_t)a1, (uint32_t)(a1 >> 32)); +} +# endif +#elif TARGET_INSN_START_WORDS == 3 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, + target_ulong a2) +{ + tcg_gen_op3(&tcg_ctx, INDEX_op_insn_start, pc, a1, a2); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, + target_ulong a2) +{ + tcg_gen_op6(&tcg_ctx, INDEX_op_insn_start, + (uint32_t)pc, (uint32_t)(pc >> 32), + (uint32_t)a1, (uint32_t)(a1 >> 32), + (uint32_t)a2, (uint32_t)(a2 >> 32)); +} +# endif #else - tcg_gen_op1i(INDEX_op_debug_insn_start, pc); +# error "Unhandled number of operands to insn_start" #endif -} static inline void tcg_gen_exit_tb(uintptr_t val) { @@ -724,7 +756,6 @@ static inline void tcg_gen_exit_tb(uintptr_t val) void tcg_gen_goto_tb(unsigned idx); #if TARGET_LONG_BITS == 32 -#define TCGv TCGv_i32 #define tcg_temp_new() tcg_temp_new_i32() #define tcg_global_reg_new tcg_global_reg_new_i32 #define tcg_global_mem_new tcg_global_mem_new_i32 @@ -736,7 +767,6 @@ void tcg_gen_goto_tb(unsigned idx); #define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32 #define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32 #else -#define TCGv TCGv_i64 #define tcg_temp_new() tcg_temp_new_i64() #define tcg_global_reg_new tcg_global_reg_new_i64 #define tcg_global_mem_new tcg_global_mem_new_i64 @@ -853,7 +883,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_divu_tl tcg_gen_divu_i64 #define tcg_gen_remu_tl tcg_gen_remu_i64 #define tcg_gen_discard_tl tcg_gen_discard_i64 -#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32 #define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 #define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 #define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 @@ -932,7 +962,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_remu_tl tcg_gen_remu_i32 #define tcg_gen_discard_tl tcg_gen_discard_i32 #define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 -#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32 #define tcg_gen_extu_i32_tl tcg_gen_mov_i32 #define tcg_gen_ext_i32_tl tcg_gen_mov_i32 #define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 diff --git a/qemu/tcg/tcg-opc.h b/qemu/tcg/tcg-opc.h index 13ccb60a5..6d0410c4b 100644 --- a/qemu/tcg/tcg-opc.h +++ b/qemu/tcg/tcg-opc.h @@ -138,8 +138,14 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(trunc_shr_i32, 1, 1, 1, - IMPL(TCG_TARGET_HAS_trunc_shr_i32) +/* size changing ops */ +DEF(ext_i32_i64, 1, 1, 0, IMPL64) +DEF(extu_i32_i64, 1, 1, 0, IMPL64) +DEF(extrl_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrl_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) +DEF(extrh_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) @@ -167,18 +173,15 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) +#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) + /* QEMU specific */ -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT) -#else -DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT) -#endif +DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, + TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) -#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) - DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1, diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 0892a9bbf..796addd1f 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -26,17 +26,12 @@ #define USE_LIVENESS_ANALYSIS #define USE_TCG_OPTIMIZATIONS -#include "config.h" +#include "qemu/osdep.h" /* Define to jump the ELF file used to communicate with GDB. */ #undef DEBUG_JIT -#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) -/* define it to suppress various consistency checks (faster) */ -#define NDEBUG -#endif - -#include "qemu-common.h" +#include "qemu/cutils.h" #include "qemu/host-utils.h" #include "qemu/timer.h" @@ -60,8 +55,10 @@ #endif #include "elf.h" +#include "exec/log.h" -/* Forward declarations for functions declared in tcg-target.c and used here. */ +/* Forward declarations for functions declared in tcg-target.inc.c and + used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -95,7 +92,7 @@ static void tcg_register_jit_int(void *buf, size_t size, size_t debug_frame_size) __attribute__((unused)); -/* Forward declarations for functions declared and used in tcg-target.c. */ +/* Forward declarations for functions declared and used in tcg-target.inc.c. */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); @@ -110,15 +107,9 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); static void tcg_out_tb_init(TCGContext *s); -static void tcg_out_tb_finalize(TCGContext *s); +static bool tcg_out_tb_finalize(TCGContext *s); -TCGOpDef tcg_op_defs[] = { -#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, -#include "tcg-opc.h" -#undef DEF -}; -const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); static TCGRegSet tcg_target_available_regs[2]; static TCGRegSet tcg_target_call_clobber_regs; @@ -233,7 +224,7 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) intptr_t value = (intptr_t)ptr; TCGRelocation *r; - assert(!l->has_value); + tcg_debug_assert(!l->has_value); for (r = l->u.first_reloc; r != NULL; r = r->next) { patch_reloc(r->ptr, r->type, value, r->addend); @@ -255,7 +246,7 @@ TCGLabel *gen_new_label(void) return l; } -#include "tcg-target.c" +#include "tcg-target.inc.c" /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) @@ -323,6 +314,8 @@ static const TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; +static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; + void tcg_context_init(TCGContext *s) { int op, total_args, n, i; @@ -365,34 +358,64 @@ void tcg_context_init(TCGContext *s) } tcg_target_init(s); + + /* Reverse the order of the saved registers, assuming they're all at + the start of tcg_target_reg_alloc_order. */ + for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { + int r = tcg_target_reg_alloc_order[n]; + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { + break; + } + } + for (i = 0; i < n; ++i) { + indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; + } + for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { + indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; + } } void tcg_prologue_init(TCGContext *s) { - /* init global prologue and epilogue */ - s->code_buf = s->code_gen_prologue; - s->code_ptr = s->code_buf; + size_t prologue_size, total_size; + void *buf0, *buf1; + + /* Put the prologue at the beginning of code_gen_buffer. */ + buf0 = s->code_gen_buffer; + s->code_ptr = buf0; + s->code_buf = buf0; + s->code_gen_prologue = buf0; + + /* Generate the prologue. */ tcg_target_qemu_prologue(s); - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + buf1 = s->code_ptr; + flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); + + /* Deduct the prologue from the buffer. */ + prologue_size = tcg_current_code_size(s); + s->code_gen_ptr = buf1; + s->code_gen_buffer = buf1; + s->code_buf = buf1; + total_size = s->code_gen_buffer_size - prologue_size; + s->code_gen_buffer_size = total_size; + + /* Compute a high-water mark, at which we voluntarily flush the buffer + and start over. The size here is arbitrary, significantly larger + than we expect the code generation for any one opcode to require. */ + s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); + + tcg_register_jit(s->code_gen_buffer, total_size); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { - size_t size = tcg_current_code_size(s); - qemu_log("PROLOGUE: [size=%zu]\n", size); - log_disas(s->code_buf, size); + qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); + log_disas(buf0, prologue_size); qemu_log("\n"); qemu_log_flush(); } #endif } -void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) -{ - s->frame_start = start; - s->frame_end = start + size; - s->frame_reg = reg; -} - void tcg_func_start(TCGContext *s) { tcg_pool_reset(s); @@ -416,128 +439,133 @@ void tcg_func_start(TCGContext *s) s->be = tcg_malloc(sizeof(TCGBackendData)); } -static inline void tcg_temp_alloc(TCGContext *s, int n) +static inline int temp_idx(TCGContext *s, TCGTemp *ts) { - if (n > TCG_MAX_TEMPS) - tcg_abort(); + ptrdiff_t n = ts - s->temps; + tcg_debug_assert(n >= 0 && n < s->nb_temps); + return n; } -static inline int tcg_global_reg_new_internal(TCGType type, int reg, - const char *name) +static inline TCGTemp *tcg_temp_alloc(TCGContext *s) +{ + int n = s->nb_temps++; + tcg_debug_assert(n < TCG_MAX_TEMPS); + return memset(&s->temps[n], 0, sizeof(TCGTemp)); +} + +static inline TCGTemp *tcg_global_alloc(TCGContext *s) +{ + tcg_debug_assert(s->nb_globals == s->nb_temps); + s->nb_globals++; + return tcg_temp_alloc(s); +} + +static int tcg_global_reg_new_internal(TCGContext *s, TCGType type, + TCGReg reg, const char *name) { - TCGContext *s = &tcg_ctx; TCGTemp *ts; - int idx; -#if TCG_TARGET_REG_BITS == 32 - if (type != TCG_TYPE_I32) - tcg_abort(); -#endif - if (tcg_regset_test_reg(s->reserved_regs, reg)) + if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { tcg_abort(); - idx = s->nb_globals; - tcg_temp_alloc(s, s->nb_globals + 1); - ts = &s->temps[s->nb_globals]; + } + + ts = tcg_global_alloc(s); ts->base_type = type; ts->type = type; ts->fixed_reg = 1; ts->reg = reg; ts->name = name; - s->nb_globals++; tcg_regset_set_reg(s->reserved_regs, reg); - return idx; + + return temp_idx(s, ts); } -TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name) +void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) { int idx; + s->frame_start = start; + s->frame_end = start + size; + idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); + s->frame_temp = &s->temps[idx]; +} - idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name); +TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name) +{ + TCGContext *s = &tcg_ctx; + int idx; + + if (tcg_regset_test_reg(s->reserved_regs, reg)) { + tcg_abort(); + } + idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name); return MAKE_TCGV_I32(idx); } -TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name) +TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name) { + TCGContext *s = &tcg_ctx; int idx; - idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name); + if (tcg_regset_test_reg(s->reserved_regs, reg)) { + tcg_abort(); + } + idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name); return MAKE_TCGV_I64(idx); } -static inline int tcg_global_mem_new_internal(TCGType type, int reg, - intptr_t offset, - const char *name) +int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, + intptr_t offset, const char *name) { TCGContext *s = &tcg_ctx; - TCGTemp *ts; - int idx; + TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)]; + TCGTemp *ts = tcg_global_alloc(s); + int indirect_reg = 0, bigendian = 0; +#ifdef HOST_WORDS_BIGENDIAN + bigendian = 1; +#endif - idx = s->nb_globals; -#if TCG_TARGET_REG_BITS == 32 - if (type == TCG_TYPE_I64) { + if (!base_ts->fixed_reg) { + indirect_reg = 1; + base_ts->indirect_base = 1; + } + + if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { + TCGTemp *ts2 = tcg_global_alloc(s); char buf[64]; - tcg_temp_alloc(s, s->nb_globals + 2); - ts = &s->temps[s->nb_globals]; - ts->base_type = type; + + ts->base_type = TCG_TYPE_I64; ts->type = TCG_TYPE_I32; - ts->fixed_reg = 0; + ts->indirect_reg = indirect_reg; ts->mem_allocated = 1; - ts->mem_reg = reg; -#ifdef HOST_WORDS_BIGENDIAN - ts->mem_offset = offset + 4; -#else - ts->mem_offset = offset; -#endif + ts->mem_base = base_ts; + ts->mem_offset = offset + bigendian * 4; pstrcpy(buf, sizeof(buf), name); pstrcat(buf, sizeof(buf), "_0"); ts->name = strdup(buf); - ts++; - ts->base_type = type; - ts->type = TCG_TYPE_I32; - ts->fixed_reg = 0; - ts->mem_allocated = 1; - ts->mem_reg = reg; -#ifdef HOST_WORDS_BIGENDIAN - ts->mem_offset = offset; -#else - ts->mem_offset = offset + 4; -#endif + tcg_debug_assert(ts2 == ts + 1); + ts2->base_type = TCG_TYPE_I64; + ts2->type = TCG_TYPE_I32; + ts2->indirect_reg = indirect_reg; + ts2->mem_allocated = 1; + ts2->mem_base = base_ts; + ts2->mem_offset = offset + (1 - bigendian) * 4; pstrcpy(buf, sizeof(buf), name); pstrcat(buf, sizeof(buf), "_1"); ts->name = strdup(buf); - - s->nb_globals += 2; - } else -#endif - { - tcg_temp_alloc(s, s->nb_globals + 1); - ts = &s->temps[s->nb_globals]; + } else { ts->base_type = type; ts->type = type; - ts->fixed_reg = 0; + ts->indirect_reg = indirect_reg; ts->mem_allocated = 1; - ts->mem_reg = reg; + ts->mem_base = base_ts; ts->mem_offset = offset; ts->name = name; - s->nb_globals++; } - return idx; -} - -TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name) -{ - int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); - return MAKE_TCGV_I32(idx); + return temp_idx(s, ts); } -TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name) -{ - int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); - return MAKE_TCGV_I64(idx); -} - -static inline int tcg_temp_new_internal(TCGType type, int temp_local) +static int tcg_temp_new_internal(TCGType type, int temp_local) { TCGContext *s = &tcg_ctx; TCGTemp *ts; @@ -551,38 +579,30 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) ts = &s->temps[idx]; ts->temp_allocated = 1; - assert(ts->base_type == type); - assert(ts->temp_local == temp_local); + tcg_debug_assert(ts->base_type == type); + tcg_debug_assert(ts->temp_local == temp_local); } else { - idx = s->nb_temps; -#if TCG_TARGET_REG_BITS == 32 - if (type == TCG_TYPE_I64) { - tcg_temp_alloc(s, s->nb_temps + 2); - ts = &s->temps[s->nb_temps]; - ts->base_type = type; - ts->type = TCG_TYPE_I32; - ts->temp_allocated = 1; - ts->temp_local = temp_local; - ts->name = NULL; - ts++; + ts = tcg_temp_alloc(s); + if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { + TCGTemp *ts2 = tcg_temp_alloc(s); + ts->base_type = type; ts->type = TCG_TYPE_I32; ts->temp_allocated = 1; ts->temp_local = temp_local; - ts->name = NULL; - s->nb_temps += 2; - } else -#endif - { - tcg_temp_alloc(s, s->nb_temps + 1); - ts = &s->temps[s->nb_temps]; + + tcg_debug_assert(ts2 == ts + 1); + ts2->base_type = TCG_TYPE_I64; + ts2->type = TCG_TYPE_I32; + ts2->temp_allocated = 1; + ts2->temp_local = temp_local; + } else { ts->base_type = type; ts->type = type; ts->temp_allocated = 1; ts->temp_local = temp_local; - ts->name = NULL; - s->nb_temps++; } + idx = temp_idx(s, ts); } #if defined(CONFIG_DEBUG_TCG) @@ -620,9 +640,9 @@ static void tcg_temp_free_internal(int idx) } #endif - assert(idx >= s->nb_globals && idx < s->nb_temps); + tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps); ts = &s->temps[idx]; - assert(ts->temp_allocated != 0); + tcg_debug_assert(ts->temp_allocated != 0); ts->temp_allocated = 0; k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); @@ -901,37 +921,30 @@ static void tcg_reg_alloc_start(TCGContext *s) ts->mem_allocated = 0; ts->fixed_reg = 0; } - for(i = 0; i < TCG_TARGET_NB_REGS; i++) { - s->reg_to_temp[i] = -1; - } + + memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); } -static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, - int idx) +static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, + TCGTemp *ts) { - TCGTemp *ts; + int idx = temp_idx(s, ts); - assert(idx >= 0 && idx < s->nb_temps); - ts = &s->temps[idx]; if (idx < s->nb_globals) { pstrcpy(buf, buf_size, ts->name); + } else if (ts->temp_local) { + snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); } else { - if (ts->temp_local) - snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); - else - snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); + snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); } return buf; } -char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg) -{ - return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg)); -} - -char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg) +static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, + int buf_size, int idx) { - return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg)); + tcg_debug_assert(idx >= 0 && idx < s->nb_temps); + return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]); } /* Find helper name. */ @@ -996,17 +1009,18 @@ void tcg_dump_ops(TCGContext *s) def = &tcg_op_defs[c]; args = &s->gen_opparam_buf[op->args]; - if (c == INDEX_op_debug_insn_start) { - uint64_t pc; + if (c == INDEX_op_insn_start) { + qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : ""); + + for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { + target_ulong a; #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS - pc = ((uint64_t)args[1] << 32) | args[0]; + a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; #else - pc = args[0]; + a = args[i]; #endif - if (oi != s->gen_first_op_idx) { - qemu_log("\n"); + qemu_log(" " TARGET_FMT_lx, a); } - qemu_log(" ---- 0x%" PRIx64, pc); } else if (c == INDEX_op_call) { /* variable number of arguments */ nb_oargs = op->callo; @@ -1172,25 +1186,25 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) if (tdefs->op == (TCGOpcode)-1) break; op = tdefs->op; - assert((unsigned)op < NB_OPS); + tcg_debug_assert((unsigned)op < NB_OPS); def = &tcg_op_defs[op]; #if defined(CONFIG_DEBUG_TCG) /* Duplicate entry in op definitions? */ - assert(!def->used); + tcg_debug_assert(!def->used); def->used = 1; #endif nb_args = def->nb_iargs + def->nb_oargs; for(i = 0; i < nb_args; i++) { ct_str = tdefs->args_ct_str[i]; /* Incomplete TCGTargetOpDef entry? */ - assert(ct_str != NULL); + tcg_debug_assert(ct_str != NULL); tcg_regset_clear(def->args_ct[i].u.regs); def->args_ct[i].ct = 0; if (ct_str[0] >= '0' && ct_str[0] <= '9') { int oarg; oarg = ct_str[0] - '0'; - assert(oarg < def->nb_oargs); - assert(def->args_ct[oarg].ct & TCG_CT_REG); + tcg_debug_assert(oarg < def->nb_oargs); + tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); /* TCG_CT_ALIAS is for the output arguments. The input argument is tagged with TCG_CT_IALIAS. */ def->args_ct[i] = def->args_ct[oarg]; @@ -1219,7 +1233,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) } /* TCGTargetOpDef entry with too much information? */ - assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); + tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); /* sort the constraints (XXX: this is just an heuristic) */ sort_constraints(def, 0, def->nb_oargs); @@ -1240,7 +1254,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) #if defined(CONFIG_DEBUG_TCG) i = 0; - for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { + for (op = 0; op < tcg_op_defs_max; op++) { const TCGOpDef *def = &tcg_op_defs[op]; if (def->flags & TCG_OPF_NOT_PRESENT) { /* Wrong entry in op definitions? */ @@ -1396,7 +1410,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } } - /* input arguments are live for preceeding opcodes */ + /* input arguments are live for preceding opcodes */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; dead_temps[arg] = 0; @@ -1406,7 +1420,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } break; - case INDEX_op_debug_insn_start: + case INDEX_op_insn_start: break; case INDEX_op_discard: /* mark the temporary as dead */ @@ -1542,7 +1556,7 @@ static void tcg_liveness_analysis(TCGContext *s) dead_args |= (1 << i); } } - /* input arguments are live for preceeding opcodes */ + /* input arguments are live for preceding opcodes */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; dead_temps[arg] = 0; @@ -1558,8 +1572,7 @@ static void tcg_liveness_analysis(TCGContext *s) /* dummy liveness analysis */ static void tcg_liveness_analysis(TCGContext *s) { - int nb_ops; - nb_ops = s->gen_opc_ptr - s->gen_opc_buf; + int nb_ops = s->gen_next_op_idx; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t)); @@ -1568,7 +1581,7 @@ static void tcg_liveness_analysis(TCGContext *s) } #endif -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static void dump_regs(TCGContext *s) { TCGTemp *ts; @@ -1583,7 +1596,8 @@ static void dump_regs(TCGContext *s) printf("%s", tcg_target_reg_names[ts->reg]); break; case TEMP_VAL_MEM: - printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]); + printf("%d(%s)", (int)ts->mem_offset, + tcg_target_reg_names[ts->mem_base->reg]); break; case TEMP_VAL_CONST: printf("$0x%" TCG_PRIlx, ts->val); @@ -1599,43 +1613,41 @@ static void dump_regs(TCGContext *s) } for(i = 0; i < TCG_TARGET_NB_REGS; i++) { - if (s->reg_to_temp[i] >= 0) { + if (s->reg_to_temp[i] != NULL) { printf("%s: %s\n", tcg_target_reg_names[i], - tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i])); + tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); } } } static void check_regs(TCGContext *s) { - int reg, k; + int reg; + int k; TCGTemp *ts; char buf[64]; - for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { - k = s->reg_to_temp[reg]; - if (k >= 0) { - ts = &s->temps[k]; - if (ts->val_type != TEMP_VAL_REG || - ts->reg != reg) { + for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + ts = s->reg_to_temp[reg]; + if (ts != NULL) { + if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { printf("Inconsistency for register %s:\n", tcg_target_reg_names[reg]); goto fail; } } } - for(k = 0; k < s->nb_temps; k++) { + for (k = 0; k < s->nb_temps; k++) { ts = &s->temps[k]; - if (ts->val_type == TEMP_VAL_REG && - !ts->fixed_reg && - s->reg_to_temp[ts->reg] != k) { - printf("Inconsistency for temp %s:\n", - tcg_get_arg_str_idx(s, buf, sizeof(buf), k)); + if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg + && s->reg_to_temp[ts->reg] != ts) { + printf("Inconsistency for temp %s:\n", + tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); fail: - printf("reg state:\n"); - dump_regs(s); - tcg_abort(); + printf("reg state:\n"); + dump_regs(s); + tcg_abort(); } } } @@ -1656,62 +1668,69 @@ static void temp_allocate_frame(TCGContext *s, int temp) tcg_abort(); } ts->mem_offset = s->current_frame_offset; - ts->mem_reg = s->frame_reg; + ts->mem_base = s->frame_temp; ts->mem_allocated = 1; s->current_frame_offset += sizeof(tcg_target_long); } +static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); + /* sync register 'reg' by saving it to the corresponding temporary */ -static inline void tcg_reg_sync(TCGContext *s, int reg) +static void tcg_reg_sync(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { - TCGTemp *ts; - int temp; + TCGTemp *ts = s->reg_to_temp[reg]; - temp = s->reg_to_temp[reg]; - ts = &s->temps[temp]; - assert(ts->val_type == TEMP_VAL_REG); + tcg_debug_assert(ts->val_type == TEMP_VAL_REG); if (!ts->mem_coherent && !ts->fixed_reg) { if (!ts->mem_allocated) { - temp_allocate_frame(s, temp); + temp_allocate_frame(s, temp_idx(s, ts)); + } else if (ts->indirect_reg) { + tcg_regset_set_reg(allocated_regs, ts->reg); + temp_load(s, ts->mem_base, + tcg_target_available_regs[TCG_TYPE_PTR], + allocated_regs); } - tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); } ts->mem_coherent = 1; } /* free register 'reg' by spilling the corresponding temporary if necessary */ -static void tcg_reg_free(TCGContext *s, int reg) +static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { - int temp; + TCGTemp *ts = s->reg_to_temp[reg]; - temp = s->reg_to_temp[reg]; - if (temp != -1) { - tcg_reg_sync(s, reg); - s->temps[temp].val_type = TEMP_VAL_MEM; - s->reg_to_temp[reg] = -1; + if (ts != NULL) { + tcg_reg_sync(s, reg, allocated_regs); + ts->val_type = TEMP_VAL_MEM; + s->reg_to_temp[reg] = NULL; } } /* Allocate a register belonging to reg1 & ~reg2 */ -static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) +static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, + TCGRegSet allocated_regs, bool rev) { - int i, reg; + int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + const int *order; + TCGReg reg; TCGRegSet reg_ct; - tcg_regset_andnot(reg_ct, reg1, reg2); + tcg_regset_andnot(reg_ct, desired_regs, allocated_regs); + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; /* first try free registers */ - for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { - reg = tcg_target_reg_alloc_order[i]; - if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1) + for(i = 0; i < n; i++) { + reg = order[i]; + if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) return reg; } /* XXX: do better spill choice */ - for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { - reg = tcg_target_reg_alloc_order[i]; + for(i = 0; i < n; i++) { + reg = order[i]; if (tcg_regset_test_reg(reg_ct, reg)) { - tcg_reg_free(s, reg); + tcg_reg_free(s, reg, allocated_regs); return reg; } } @@ -1719,65 +1738,92 @@ static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) tcg_abort(); } -/* mark a temporary as dead. */ -static inline void temp_dead(TCGContext *s, int temp) +/* Make sure the temporary is in a register. If needed, allocate the register + from DESIRED while avoiding ALLOCATED. */ +static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, + TCGRegSet allocated_regs) { - TCGTemp *ts; + TCGReg reg; - ts = &s->temps[temp]; - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = -1; - } - if (temp < s->nb_globals || ts->temp_local) { - ts->val_type = TEMP_VAL_MEM; - } else { - ts->val_type = TEMP_VAL_DEAD; + switch (ts->val_type) { + case TEMP_VAL_REG: + return; + case TEMP_VAL_CONST: + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + tcg_out_movi(s, ts->type, reg, ts->val); + ts->mem_coherent = 0; + break; + case TEMP_VAL_MEM: + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + if (ts->indirect_reg) { + tcg_regset_set_reg(allocated_regs, reg); + temp_load(s, ts->mem_base, + tcg_target_available_regs[TCG_TYPE_PTR], + allocated_regs); } + tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); + ts->mem_coherent = 1; + break; + case TEMP_VAL_DEAD: + default: + tcg_abort(); } + ts->reg = reg; + ts->val_type = TEMP_VAL_REG; + s->reg_to_temp[reg] = ts; +} + +/* mark a temporary as dead. */ +static inline void temp_dead(TCGContext *s, TCGTemp *ts) +{ + if (ts->fixed_reg) { + return; + } + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; + } + ts->val_type = (temp_idx(s, ts) < s->nb_globals || ts->temp_local + ? TEMP_VAL_MEM : TEMP_VAL_DEAD); } /* sync a temporary to memory. 'allocated_regs' is used in case a temporary registers needs to be allocated to store a constant. */ -static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs) +static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) { - TCGTemp *ts; - - ts = &s->temps[temp]; - if (!ts->fixed_reg) { - switch(ts->val_type) { - case TEMP_VAL_CONST: - ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], - allocated_regs); - ts->val_type = TEMP_VAL_REG; - s->reg_to_temp[ts->reg] = temp; - ts->mem_coherent = 0; - tcg_out_movi(s, ts->type, ts->reg, ts->val); - /* fallthrough*/ - case TEMP_VAL_REG: - tcg_reg_sync(s, ts->reg); - break; - case TEMP_VAL_DEAD: - case TEMP_VAL_MEM: - break; - default: - tcg_abort(); - } + if (ts->fixed_reg) { + return; + } + switch (ts->val_type) { + case TEMP_VAL_CONST: + temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs); + /* fallthrough */ + case TEMP_VAL_REG: + tcg_reg_sync(s, ts->reg, allocated_regs); + break; + case TEMP_VAL_DEAD: + case TEMP_VAL_MEM: + break; + default: + tcg_abort(); } } /* save a temporary to memory. 'allocated_regs' is used in case a temporary registers needs to be allocated to store a constant. */ -static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +static inline void temp_save(TCGContext *s, TCGTemp *ts, + TCGRegSet allocated_regs) { #ifdef USE_LIVENESS_ANALYSIS - /* The liveness analysis already ensures that globals are back - in memory. Keep an assert for safety. */ - assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg); -#else - temp_sync(s, temp, allocated_regs); - temp_dead(s, temp); + /* ??? Liveness does not yet incorporate indirect bases. */ + if (!ts->indirect_base) { + /* The liveness analysis already ensures that globals are back + in memory. Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); + return; + } #endif + temp_sync(s, ts, allocated_regs); + temp_dead(s, ts); } /* save globals to their canonical location and assume they can be @@ -1787,8 +1833,8 @@ static void save_globals(TCGContext *s, TCGRegSet allocated_regs) { int i; - for(i = 0; i < s->nb_globals; i++) { - temp_save(s, i, allocated_regs); + for (i = 0; i < s->nb_globals; i++) { + temp_save(s, &s->temps[i], allocated_regs); } } @@ -1800,12 +1846,17 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) int i; for (i = 0; i < s->nb_globals; i++) { + TCGTemp *ts = &s->temps[i]; #ifdef USE_LIVENESS_ANALYSIS - assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg || - s->temps[i].mem_coherent); -#else - temp_sync(s, i, allocated_regs); + /* ??? Liveness does not yet incorporate indirect bases. */ + if (!ts->indirect_base) { + tcg_debug_assert(ts->val_type != TEMP_VAL_REG + || ts->fixed_reg + || ts->mem_coherent); + continue; + } #endif + temp_sync(s, ts, allocated_regs); } } @@ -1813,21 +1864,23 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { - TCGTemp *ts; int i; - for(i = s->nb_globals; i < s->nb_temps; i++) { - ts = &s->temps[i]; + for (i = s->nb_globals; i < s->nb_temps; i++) { + TCGTemp *ts = &s->temps[i]; if (ts->temp_local) { - temp_save(s, i, allocated_regs); + temp_save(s, ts, allocated_regs); } else { #ifdef USE_LIVENESS_ANALYSIS - /* The liveness analysis already ensures that temps are dead. - Keep an assert for safety. */ - assert(ts->val_type == TEMP_VAL_DEAD); -#else - temp_dead(s, i); + /* ??? Liveness does not yet incorporate indirect bases. */ + if (!ts->indirect_base) { + /* The liveness analysis already ensures that temps are dead. + Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); + continue; + } #endif + temp_dead(s, ts); } } @@ -1852,16 +1905,17 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, tcg_out_movi(s, ots->type, ots->reg, val); } else { /* The movi is not explicitly generated here */ - if (ots->val_type == TEMP_VAL_REG) - s->reg_to_temp[ots->reg] = -1; + if (ots->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ots->reg] = NULL; + } ots->val_type = TEMP_VAL_CONST; ots->val = val; } if (NEED_SYNC_ARG(0)) { - temp_sync(s, args[0], s->reserved_regs); + temp_sync(s, ots, s->reserved_regs); } if (IS_DEAD_ARG(0)) { - temp_dead(s, args[0]); + temp_dead(s, ots); } } @@ -1887,69 +1941,65 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, we don't have to reload SOURCE the next time it is used. */ if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG) || ts->val_type == TEMP_VAL_MEM) { - ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype], - allocated_regs); - if (ts->val_type == TEMP_VAL_MEM) { - tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset); - ts->mem_coherent = 1; - } else if (ts->val_type == TEMP_VAL_CONST) { - tcg_out_movi(s, itype, ts->reg, ts->val); - ts->mem_coherent = 0; - } - s->reg_to_temp[ts->reg] = args[1]; - ts->val_type = TEMP_VAL_REG; + temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); } if (IS_DEAD_ARG(0) && !ots->fixed_reg) { /* mov to a non-saved dead register makes no sense (even with liveness analysis disabled). */ - assert(NEED_SYNC_ARG(0)); + tcg_debug_assert(NEED_SYNC_ARG(0)); /* The code above should have moved the temp to a register. */ - assert(ts->val_type == TEMP_VAL_REG); + tcg_debug_assert(ts->val_type == TEMP_VAL_REG); if (!ots->mem_allocated) { temp_allocate_frame(s, args[0]); } - tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset); + if (ots->indirect_reg) { + tcg_regset_set_reg(allocated_regs, ts->reg); + temp_load(s, ots->mem_base, + tcg_target_available_regs[TCG_TYPE_PTR], + allocated_regs); + } + tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); if (IS_DEAD_ARG(1)) { - temp_dead(s, args[1]); + temp_dead(s, ts); } - temp_dead(s, args[0]); + temp_dead(s, ots); } else if (ts->val_type == TEMP_VAL_CONST) { /* propagate constant */ if (ots->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ots->reg] = -1; + s->reg_to_temp[ots->reg] = NULL; } ots->val_type = TEMP_VAL_CONST; ots->val = ts->val; if (IS_DEAD_ARG(1)) { - temp_dead(s, args[1]); + temp_dead(s, ts); } } else { /* The code in the first if block should have moved the temp to a register. */ - assert(ts->val_type == TEMP_VAL_REG); + tcg_debug_assert(ts->val_type == TEMP_VAL_REG); if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { /* the mov can be suppressed */ if (ots->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ots->reg] = -1; + s->reg_to_temp[ots->reg] = NULL; } ots->reg = ts->reg; - temp_dead(s, args[1]); + temp_dead(s, ts); } else { if (ots->val_type != TEMP_VAL_REG) { /* When allocating a new register, make sure to not spill the input one. */ tcg_regset_set_reg(allocated_regs, ts->reg); ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], - allocated_regs); + allocated_regs, ots->indirect_base); } tcg_out_mov(s, otype, ots->reg, ts->reg); } ots->val_type = TEMP_VAL_REG; ots->mem_coherent = 0; - s->reg_to_temp[ots->reg] = args[0]; + s->reg_to_temp[ots->reg] = ots; if (NEED_SYNC_ARG(0)) { - tcg_reg_sync(s, ots->reg); + tcg_reg_sync(s, ots->reg, allocated_regs); } } } @@ -1960,7 +2010,8 @@ static void tcg_reg_alloc_op(TCGContext *s, uint8_t sync_args) { TCGRegSet allocated_regs; - int i, k, nb_iargs, nb_oargs, reg; + int i, k, nb_iargs, nb_oargs; + TCGReg reg; TCGArg arg; const TCGArgConstraint *arg_ct; TCGTemp *ts; @@ -1982,30 +2033,17 @@ static void tcg_reg_alloc_op(TCGContext *s, arg = args[i]; arg_ct = &def->args_ct[i]; ts = &s->temps[arg]; - if (ts->val_type == TEMP_VAL_MEM) { - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 1; - s->reg_to_temp[reg] = arg; - } else if (ts->val_type == TEMP_VAL_CONST) { - if (tcg_target_const_match(ts->val, ts->type, arg_ct)) { - /* constant is OK for instruction */ - const_args[i] = 1; - new_args[i] = ts->val; - goto iarg_end; - } else { - /* need to move to a register */ - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_movi(s, ts->type, reg, ts->val); - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 0; - s->reg_to_temp[reg] = arg; - } + + if (ts->val_type == TEMP_VAL_CONST + && tcg_target_const_match(ts->val, ts->type, arg_ct)) { + /* constant is OK for instruction */ + const_args[i] = 1; + new_args[i] = ts->val; + goto iarg_end; } - assert(ts->val_type == TEMP_VAL_REG); + + temp_load(s, ts, arg_ct->u.regs, allocated_regs); + if (arg_ct->ct & TCG_CT_IALIAS) { if (ts->fixed_reg) { /* if fixed register, we must allocate a new register @@ -2038,7 +2076,8 @@ static void tcg_reg_alloc_op(TCGContext *s, allocate_in_reg: /* allocate a new register matching the constraint and move the temporary register into it */ - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs, + ts->indirect_base); tcg_out_mov(s, ts->type, reg, ts->reg); } new_args[i] = reg; @@ -2050,7 +2089,7 @@ static void tcg_reg_alloc_op(TCGContext *s, /* mark dead temporaries and free the associated registers */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { if (IS_DEAD_ARG(i)) { - temp_dead(s, args[i]); + temp_dead(s, &s->temps[args[i]]); } } @@ -2059,9 +2098,9 @@ static void tcg_reg_alloc_op(TCGContext *s, } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { /* XXX: permit generic clobber register list ? */ - for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { - if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { - tcg_reg_free(s, reg); + for (i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { + tcg_reg_free(s, i, allocated_regs); } } } @@ -2087,20 +2126,21 @@ static void tcg_reg_alloc_op(TCGContext *s, tcg_regset_test_reg(arg_ct->u.regs, reg)) { goto oarg_end; } - reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs, + ts->indirect_base); } tcg_regset_set_reg(allocated_regs, reg); /* if a fixed register is used, then a move will be done afterwards */ if (!ts->fixed_reg) { if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = -1; + s->reg_to_temp[ts->reg] = NULL; } ts->val_type = TEMP_VAL_REG; ts->reg = reg; /* temp value is modified, so the value kept in memory is potentially not the same */ ts->mem_coherent = 0; - s->reg_to_temp[reg] = arg; + s->reg_to_temp[reg] = ts; } oarg_end: new_args[i] = reg; @@ -2118,10 +2158,10 @@ static void tcg_reg_alloc_op(TCGContext *s, tcg_out_mov(s, ts->type, ts->reg, reg); } if (NEED_SYNC_ARG(i)) { - tcg_reg_sync(s, reg); + tcg_reg_sync(s, reg, allocated_regs); } if (IS_DEAD_ARG(i)) { - temp_dead(s, args[i]); + temp_dead(s, ts); } } } @@ -2136,7 +2176,8 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, const TCGArg * const args, uint16_t dead_args, uint8_t sync_args) { - int flags, nb_regs, i, reg; + int flags, nb_regs, i; + TCGReg reg; TCGArg arg; TCGTemp *ts; intptr_t stack_offset; @@ -2172,23 +2213,9 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, #endif if (arg != TCG_CALL_DUMMY_ARG) { ts = &s->temps[arg]; - if (ts->val_type == TEMP_VAL_REG) { - tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); - } else if (ts->val_type == TEMP_VAL_MEM) { - reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], - s->reserved_regs); - /* XXX: not correct if reading values from the stack */ - tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); - } else if (ts->val_type == TEMP_VAL_CONST) { - reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], - s->reserved_regs); - /* XXX: sign extend may be needed on some targets */ - tcg_out_movi(s, ts->type, reg, ts->val); - tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); - } else { - tcg_abort(); - } + temp_load(s, ts, tcg_target_available_regs[ts->type], + s->reserved_regs); + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); } #ifndef TCG_TARGET_STACK_GROWSUP stack_offset += sizeof(tcg_target_long); @@ -2202,19 +2229,20 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, if (arg != TCG_CALL_DUMMY_ARG) { ts = &s->temps[arg]; reg = tcg_target_call_iarg_regs[i]; - tcg_reg_free(s, reg); + tcg_reg_free(s, reg, allocated_regs); + if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { tcg_out_mov(s, ts->type, reg, ts->reg); } - } else if (ts->val_type == TEMP_VAL_MEM) { - tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - } else if (ts->val_type == TEMP_VAL_CONST) { - /* XXX: sign extend ? */ - tcg_out_movi(s, ts->type, reg, ts->val); } else { - tcg_abort(); + TCGRegSet arg_set; + + tcg_regset_clear(arg_set); + tcg_regset_set_reg(arg_set, reg); + temp_load(s, ts, arg_set, allocated_regs); } + tcg_regset_set_reg(allocated_regs, reg); } } @@ -2222,14 +2250,14 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, /* mark dead temporaries and free the associated registers */ for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { if (IS_DEAD_ARG(i)) { - temp_dead(s, args[i]); + temp_dead(s, &s->temps[args[i]]); } } /* clobber call registers */ - for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { - if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { - tcg_reg_free(s, reg); + for (i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { + tcg_reg_free(s, i, allocated_regs); } } @@ -2250,7 +2278,7 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, arg = args[i]; ts = &s->temps[arg]; reg = tcg_target_call_oarg_regs[i]; - assert(s->reg_to_temp[reg] == -1); + tcg_debug_assert(s->reg_to_temp[reg] == NULL); if (ts->fixed_reg) { if (ts->reg != reg) { @@ -2258,17 +2286,17 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, } } else { if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = -1; + s->reg_to_temp[ts->reg] = NULL; } ts->val_type = TEMP_VAL_REG; ts->reg = reg; ts->mem_coherent = 0; - s->reg_to_temp[reg] = arg; + s->reg_to_temp[reg] = ts; if (NEED_SYNC_ARG(i)) { - tcg_reg_sync(s, reg); + tcg_reg_sync(s, reg, allocated_regs); } if (IS_DEAD_ARG(i)) { - temp_dead(s, args[i]); + temp_dead(s, ts); } } } @@ -2295,14 +2323,31 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) #endif -static inline int tcg_gen_code_common(TCGContext *s, - tcg_insn_unit *gen_code_buf, - long search_pc) +int tcg_gen_code(TCGContext *s, TranslationBlock *tb) { - int oi, oi_next; + int i, oi, oi_next, num_insns; + +#ifdef CONFIG_PROFILER + { + int n; + + n = s->gen_last_op_idx + 1; + s->op_count += n; + if (n > s->op_count_max) { + s->op_count_max = n; + } + + n = s->nb_temps; + s->temp_count += n; + if (n > s->temp_count_max) { + s->temp_count_max = n; + } + } +#endif #ifdef DEBUG_DISAS - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) + && qemu_log_in_addr_range(tb->pc))) { qemu_log("OP:\n"); tcg_dump_ops(s); qemu_log("\n"); @@ -2329,7 +2374,8 @@ static inline int tcg_gen_code_common(TCGContext *s, #endif #ifdef DEBUG_DISAS - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) { + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) + && qemu_log_in_addr_range(tb->pc))) { qemu_log("OP after optimization and liveness analysis:\n"); tcg_dump_ops(s); qemu_log("\n"); @@ -2338,11 +2384,12 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_reg_alloc_start(s); - s->code_buf = gen_code_buf; - s->code_ptr = gen_code_buf; + s->code_buf = tb->tc_ptr; + s->code_ptr = tb->tc_ptr; tcg_out_tb_init(s); + num_insns = -1; for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { TCGOp * const op = &s->gen_op_buf[oi]; TCGArg * const args = &s->gen_opparam_buf[op->args]; @@ -2365,10 +2412,23 @@ static inline int tcg_gen_code_common(TCGContext *s, case INDEX_op_movi_i64: tcg_reg_alloc_movi(s, args, dead_args, sync_args); break; - case INDEX_op_debug_insn_start: + case INDEX_op_insn_start: + if (num_insns >= 0) { + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); + } + num_insns++; + for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { + target_ulong a; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; +#else + a = args[i]; +#endif + s->gen_insn_data[num_insns][i] = a; + } break; case INDEX_op_discard: - temp_dead(s, args[0]); + temp_dead(s, &s->temps[args[0]]); break; case INDEX_op_set_label: tcg_reg_alloc_bb_end(s, s->reserved_regs); @@ -2389,40 +2449,24 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args); break; } - if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) { - return oi; - } -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG check_regs(s); #endif + /* Test for (pending) buffer overflow. The assumption is that any + one operation beginning below the high water mark cannot overrun + the buffer completely. Thus we can test for overflow after + generating code without having to check during generation. */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return -1; + } } + tcg_debug_assert(num_insns >= 0); + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ - tcg_out_tb_finalize(s); - return -1; -} - -int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) -{ -#ifdef CONFIG_PROFILER - { - int n; - - n = s->gen_last_op_idx + 1; - s->op_count += n; - if (n > s->op_count_max) { - s->op_count_max = n; - } - - n = s->nb_temps; - s->temp_count += n; - if (n > s->temp_count_max) { - s->temp_count_max = n; - } + if (!tcg_out_tb_finalize(s)) { + return -1; } -#endif - - tcg_gen_code_common(s, gen_code_buf, -1); /* flush instruction cache */ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); @@ -2430,38 +2474,30 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) return tcg_current_code_size(s); } -/* Return the index of the micro operation such as the pc after is < - offset bytes from the start of the TB. The contents of gen_code_buf must - not be changed, though writing the same values is ok. - Return -1 if not found. */ -int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, - long offset) -{ - return tcg_gen_code_common(s, gen_code_buf, offset); -} - #ifdef CONFIG_PROFILER void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) { TCGContext *s = &tcg_ctx; - int64_t tot; + int64_t tb_count = s->tb_count; + int64_t tb_div_count = tb_count ? tb_count : 1; + int64_t tot = s->interm_time + s->code_time; - tot = s->interm_time + s->code_time; cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", tot, tot / 2.4e9); cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", - s->tb_count, - s->tb_count1 - s->tb_count, - s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0); + tb_count, s->tb_count1 - tb_count, + (double)(s->tb_count1 - s->tb_count) + / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", - s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max); + (double)s->op_count / tb_div_count, s->op_count_max); cpu_fprintf(f, "deleted ops/TB %0.2f\n", - s->tb_count ? - (double)s->del_op_count / s->tb_count : 0); + (double)s->del_op_count / tb_div_count); cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", - s->tb_count ? - (double)s->temp_count / s->tb_count : 0, - s->temp_count_max); + (double)s->temp_count / tb_div_count, s->temp_count_max); + cpu_fprintf(f, "avg host code/TB %0.1f\n", + (double)s->code_out_len / tb_div_count); + cpu_fprintf(f, "avg search data/TB %0.1f\n", + (double)s->search_out_len / tb_div_count); cpu_fprintf(f, "cycles/op %0.1f\n", s->op_count ? (double)tot / s->op_count : 0); @@ -2469,8 +2505,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) s->code_in_len ? (double)tot / s->code_in_len : 0); cpu_fprintf(f, "cycles/out byte %0.1f\n", s->code_out_len ? (double)tot / s->code_out_len : 0); - if (tot == 0) + cpu_fprintf(f, "cycles/search byte %0.1f\n", + s->search_out_len ? (double)tot / s->search_out_len : 0); + if (tot == 0) { tot = 1; + } cpu_fprintf(f, " gen_interm time %0.1f%%\n", (double)s->interm_time / tot * 100.0); cpu_fprintf(f, " gen_code time %0.1f%%\n", diff --git a/qemu/tcg/tcg.h b/qemu/tcg/tcg.h index 231a78152..40c8fbe2a 100644 --- a/qemu/tcg/tcg.h +++ b/qemu/tcg/tcg.h @@ -66,7 +66,8 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 @@ -128,6 +129,12 @@ typedef uint64_t TCGRegSet; # error "Missing unsigned widening multiply" #endif +#ifndef TARGET_INSN_START_EXTRA_WORDS +# define TARGET_INSN_START_WORDS 1 +#else +# define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS) +#endif + typedef enum TCGOpcode { #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, #include "tcg-opc.h" @@ -187,6 +194,7 @@ typedef struct TCGPool { #define TCG_POOL_CHUNK_SIZE 32768 #define TCG_MAX_TEMPS 512 +#define TCG_MAX_INSNS 512 /* when the size of the arguments of a called function is smaller than this value, they are statically allocated in the TB stack frame */ @@ -300,6 +308,14 @@ typedef tcg_target_ulong TCGArg; typedef struct TCGv_i32_d *TCGv_i32; typedef struct TCGv_i64_d *TCGv_i64; typedef struct TCGv_ptr_d *TCGv_ptr; +typedef TCGv_ptr TCGv_env; +#if TARGET_LONG_BITS == 32 +#define TCGv TCGv_i32 +#elif TARGET_LONG_BITS == 64 +#define TCGv TCGv_i64 +#else +#error Unhandled TARGET_LONG_BITS value +#endif static inline TCGv_i32 QEMU_ARTIFICIAL MAKE_TCGV_I32(intptr_t i) { @@ -440,12 +456,13 @@ typedef enum TCGTempVal { } TCGTempVal; typedef struct TCGTemp { - unsigned int reg:8; - unsigned int mem_reg:8; + TCGReg reg:8; TCGTempVal val_type:8; TCGType base_type:8; TCGType type:8; unsigned int fixed_reg:1; + unsigned int indirect_reg:1; + unsigned int indirect_base:1; unsigned int mem_coherent:1; unsigned int mem_allocated:1; unsigned int temp_local:1; /* If true, the temp is saved across @@ -454,6 +471,7 @@ typedef struct TCGTemp { unsigned int temp_allocated:1; /* never used for code gen */ tcg_target_long val; + struct TCGTemp *mem_base; intptr_t mem_offset; const char *name; } TCGTemp; @@ -507,7 +525,7 @@ struct TCGContext { intptr_t current_frame_offset; intptr_t frame_start; intptr_t frame_end; - int frame_reg; + TCGTemp *frame_temp; tcg_insn_unit *code_ptr; @@ -524,6 +542,7 @@ struct TCGContext { int64_t del_op_count; int64_t code_in_len; int64_t code_out_len; + int64_t search_out_len; int64_t interm_time; int64_t code_time; int64_t la_time; @@ -550,28 +569,28 @@ struct TCGContext { void *code_gen_prologue; void *code_gen_buffer; size_t code_gen_buffer_size; - /* threshold to flush the translated code buffer */ - size_t code_gen_buffer_max_size; void *code_gen_ptr; + /* Threshold to flush the translated code buffer. */ + void *code_gen_highwater; + TBContext tb_ctx; - /* The TCGBackendData structure is private to tcg-target.c. */ + /* The TCGBackendData structure is private to tcg-target.inc.c. */ struct TCGBackendData *be; TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ - /* tells in which temporary a given register is. It does not take - into account fixed registers */ - int reg_to_temp[TCG_TARGET_NB_REGS]; + /* Tells which temporary holds a given register. + It does not take into account fixed registers */ + TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS]; TCGOp gen_op_buf[OPC_BUF_SIZE]; TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; - target_ulong gen_opc_pc[OPC_BUF_SIZE]; - uint16_t gen_opc_icount[OPC_BUF_SIZE]; - uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; + uint16_t gen_insn_end_off[TCG_MAX_INSNS]; + target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; }; extern TCGContext tcg_ctx; @@ -594,6 +613,10 @@ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); void tcg_pool_delete(TCGContext *s); +void tb_lock(void); +void tb_unlock(void); +void tb_lock_reset(void); + static inline void *tcg_malloc(int size) { TCGContext *s = &tcg_ctx; @@ -613,39 +636,54 @@ void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); -int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); -int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, - long offset); +int tcg_gen_code(TCGContext *s, TranslationBlock *tb); -void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size); +void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); + +int tcg_global_mem_new_internal(TCGType, TCGv_ptr, intptr_t, const char *); + +TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name); +TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name); -TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name); -TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name); TCGv_i32 tcg_temp_new_internal_i32(int temp_local); +TCGv_i64 tcg_temp_new_internal_i64(int temp_local); + +void tcg_temp_free_i32(TCGv_i32 arg); +void tcg_temp_free_i64(TCGv_i64 arg); + +static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); + return MAKE_TCGV_I32(idx); +} + static inline TCGv_i32 tcg_temp_new_i32(void) { return tcg_temp_new_internal_i32(0); } + static inline TCGv_i32 tcg_temp_local_new_i32(void) { return tcg_temp_new_internal_i32(1); } -void tcg_temp_free_i32(TCGv_i32 arg); -char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg); -TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name); -TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name); -TCGv_i64 tcg_temp_new_internal_i64(int temp_local); +static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); + return MAKE_TCGV_I64(idx); +} + static inline TCGv_i64 tcg_temp_new_i64(void) { return tcg_temp_new_internal_i64(0); } + static inline TCGv_i64 tcg_temp_local_new_i64(void) { return tcg_temp_new_internal_i64(1); } -void tcg_temp_free_i64(TCGv_i64 arg); -char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); #if defined(CONFIG_DEBUG_TCG) /* If you call tcg_clear_temp_count() at the start of a section of @@ -985,25 +1023,48 @@ void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi, uintptr_t retaddr); +uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); + /* Temporary aliases until backends are converted. */ #ifdef TARGET_WORDS_BIGENDIAN # define helper_ret_ldsw_mmu helper_be_ldsw_mmu # define helper_ret_lduw_mmu helper_be_lduw_mmu # define helper_ret_ldsl_mmu helper_be_ldsl_mmu # define helper_ret_ldul_mmu helper_be_ldul_mmu +# define helper_ret_ldl_mmu helper_be_ldul_mmu # define helper_ret_ldq_mmu helper_be_ldq_mmu # define helper_ret_stw_mmu helper_be_stw_mmu # define helper_ret_stl_mmu helper_be_stl_mmu # define helper_ret_stq_mmu helper_be_stq_mmu +# define helper_ret_ldw_cmmu helper_be_ldw_cmmu +# define helper_ret_ldl_cmmu helper_be_ldl_cmmu +# define helper_ret_ldq_cmmu helper_be_ldq_cmmu #else # define helper_ret_ldsw_mmu helper_le_ldsw_mmu # define helper_ret_lduw_mmu helper_le_lduw_mmu # define helper_ret_ldsl_mmu helper_le_ldsl_mmu # define helper_ret_ldul_mmu helper_le_ldul_mmu +# define helper_ret_ldl_mmu helper_le_ldul_mmu # define helper_ret_ldq_mmu helper_le_ldq_mmu # define helper_ret_stw_mmu helper_le_stw_mmu # define helper_ret_stl_mmu helper_le_stl_mmu # define helper_ret_stq_mmu helper_le_stq_mmu +# define helper_ret_ldw_cmmu helper_le_ldw_cmmu +# define helper_ret_ldl_cmmu helper_le_ldl_cmmu +# define helper_ret_ldq_cmmu helper_le_ldq_cmmu #endif #endif /* CONFIG_SOFTMMU */ diff --git a/qemu/tcg/tci/README b/qemu/tcg/tci/README index dc57f076b..3786b0915 100644 --- a/qemu/tcg/tci/README +++ b/qemu/tcg/tci/README @@ -21,7 +21,7 @@ This is what TCI (Tiny Code Interpreter) does. 2) Implementation Like each TCG host frontend, TCI implements the code generator in -tcg-target.c, tcg-target.h. Both files are in directory tcg/tci. +tcg-target.inc.c, tcg-target.h. Both files are in directory tcg/tci. The additional file tcg/tci.c adds the interpreter. @@ -123,7 +123,7 @@ u1 = linux-user-test works would also improve speed for hosts which support byte alignment). * A better disassembler for the pseudo code would be nice (a very primitive - disassembler is included in tcg-target.c). + disassembler is included in tcg-target.inc.c). * It might be useful to have a runtime option which selects the native TCG or TCI, so QEMU would have to include two TCGs. Today, selecting TCI diff --git a/qemu/tcg/tci/tcg-target.h b/qemu/tcg/tci/tcg-target.h index cbf3f9b5a..3942f9ccc 100644 --- a/qemu/tcg/tci/tcg-target.h +++ b/qemu/tcg/tci/tcg-target.h @@ -40,7 +40,6 @@ #if !defined(TCG_TARGET_H) #define TCG_TARGET_H -#include "config-host.h" #define TCG_TARGET_INTERPRETER 1 #define TCG_TARGET_INSN_UNIT_SIZE 1 @@ -84,7 +83,8 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/qemu/tcg/tci/tcg-target.c b/qemu/tcg/tci/tcg-target.inc.c index 83472dbcd..e2fc52a16 100644 --- a/qemu/tcg/tci/tcg-target.c +++ b/qemu/tcg/tci/tcg-target.inc.c @@ -210,6 +210,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { #if TCG_TARGET_HAS_ext32u_i64 { INDEX_op_ext32u_i64, { R, R } }, #endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, #if TCG_TARGET_HAS_bswap16_i64 { INDEX_op_bswap16_i64, { R, R } }, #endif @@ -313,7 +315,7 @@ static const int tcg_target_call_oarg_regs[] = { #endif }; -#ifndef NDEBUG +#ifdef CONFIG_DEBUG_TCG static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "r00", "r01", @@ -358,9 +360,9 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { /* tcg_out_reloc always uses the same type, addend. */ - assert(type == sizeof(tcg_target_long)); - assert(addend == 0); - assert(value != 0); + tcg_debug_assert(type == sizeof(tcg_target_long)); + tcg_debug_assert(addend == 0); + tcg_debug_assert(value != 0); if (TCG_TARGET_REG_BITS == 32) { tcg_patch32(code_ptr, value); } else { @@ -417,7 +419,7 @@ static void tcg_out_op_t(TCGContext *s, TCGOpcode op) /* Write register. */ static void tcg_out_r(TCGContext *s, TCGArg t0) { - assert(t0 < TCG_TARGET_NB_REGS); + tcg_debug_assert(t0 < TCG_TARGET_NB_REGS); tcg_out8(s, t0); } @@ -425,7 +427,7 @@ static void tcg_out_r(TCGContext *s, TCGArg t0) static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) { if (const_arg) { - assert(const_arg == 1); + tcg_debug_assert(const_arg == 1); tcg_out8(s, TCG_CONST); tcg_out_i(s, arg); } else { @@ -437,7 +439,7 @@ static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) { if (const_arg) { - assert(const_arg == 1); + tcg_debug_assert(const_arg == 1); tcg_out8(s, TCG_CONST); tcg_out32(s, arg); } else { @@ -450,7 +452,7 @@ static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) { if (const_arg) { - assert(const_arg == 1); + tcg_debug_assert(const_arg == 1); tcg_out8(s, TCG_CONST); tcg_out64(s, arg); } else { @@ -464,7 +466,7 @@ static void tci_out_label(TCGContext *s, TCGLabel *label) { if (label->has_value) { tcg_out_i(s, label->u.value); - assert(label->u.value); + tcg_debug_assert(label->u.value); } else { tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); s->code_ptr += sizeof(tcg_target_ulong); @@ -481,12 +483,12 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_out_r(s, arg1); tcg_out32(s, arg2); } else { - assert(type == TCG_TYPE_I64); + tcg_debug_assert(type == TCG_TYPE_I64); #if TCG_TARGET_REG_BITS == 64 tcg_out_op_t(s, INDEX_op_ld_i64); tcg_out_r(s, ret); tcg_out_r(s, arg1); - assert(arg2 == (int32_t)arg2); + tcg_debug_assert(arg2 == (int32_t)arg2); tcg_out32(s, arg2); #else TODO(); @@ -498,7 +500,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { uint8_t *old_code_ptr = s->code_ptr; - assert(ret != arg); + tcg_debug_assert(ret != arg); #if TCG_TARGET_REG_BITS == 32 tcg_out_op_t(s, INDEX_op_mov_i32); #else @@ -519,7 +521,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_r(s, t0); tcg_out32(s, arg32); } else { - assert(type == TCG_TYPE_I64); + tcg_debug_assert(type == TCG_TYPE_I64); #if TCG_TARGET_REG_BITS == 64 tcg_out_op_t(s, INDEX_op_movi_i64); tcg_out_r(s, t0); @@ -553,14 +555,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* Direct jump method. */ - assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); + tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* Indirect jump method. */ TODO(); } - assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); + tcg_debug_assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: @@ -611,7 +613,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_st_i64: tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); - assert(args[2] == (int32_t)args[2]); + tcg_debug_assert(args[2] == (int32_t)args[2]); tcg_out32(s, args[2]); break; case INDEX_op_add_i32: @@ -638,9 +640,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); + tcg_debug_assert(args[3] <= UINT8_MAX); tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); + tcg_debug_assert(args[4] <= UINT8_MAX); tcg_out8(s, args[4]); break; @@ -669,9 +671,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); + tcg_debug_assert(args[3] <= UINT8_MAX); tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); + tcg_debug_assert(args[4] <= UINT8_MAX); tcg_out8(s, args[4]); break; case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ @@ -701,6 +703,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: #endif /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ @@ -815,7 +819,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_out_r(s, arg1); tcg_out32(s, arg2); } else { - assert(type == TCG_TYPE_I64); + tcg_debug_assert(type == TCG_TYPE_I64); #if TCG_TARGET_REG_BITS == 64 tcg_out_op_t(s, INDEX_op_st_i64); tcg_out_r(s, arg); @@ -846,7 +850,7 @@ static void tcg_target_init(TCGContext *s) #endif /* The current code uses uint8_t for tcg operations. */ - assert(ARRAY_SIZE(tcg_op_defs) <= UINT8_MAX); + tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX); /* Registers available for 32 bit operations. */ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, |