// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. // /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX emitX86.cpp XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #endif #if defined(_TARGET_XARCH_) /*****************************************************************************/ /*****************************************************************************/ #include "instr.h" #include "emit.h" #include "codegen.h" bool IsSSE2Instruction(instruction ins) { return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION); } bool IsSSEOrAVXInstruction(instruction ins) { #ifdef FEATURE_AVX_SUPPORT return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION); #else // !FEATURE_AVX_SUPPORT return IsSSE2Instruction(ins); #endif // !FEATURE_AVX_SUPPORT } bool emitter::IsAVXInstruction(instruction ins) { #ifdef FEATURE_AVX_SUPPORT return (UseAVX() && IsSSEOrAVXInstruction(ins)); #else return false; #endif } #define REX_PREFIX_MASK 0xFF00000000LL #ifdef FEATURE_AVX_SUPPORT // Returns true if the AVX instruction is a binary operator that requires 3 operands. // When we emit an instruction with only two operands, we will duplicate the destination // as a source. // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to // be formalized by adding an additional field to instruction table to // to indicate whether a 3-operand instruction. bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins) { return IsAVXInstruction(ins) && ( ins == INS_cvtsi2ss || ins == INS_cvtsi2sd || ins == INS_cvtss2sd || ins == INS_cvtsd2ss || ins == INS_addss || ins == INS_addsd || ins == INS_subss || ins == INS_subsd || ins == INS_mulss || ins == INS_mulsd || ins == INS_divss || ins == INS_divsd || ins == INS_addps || ins == INS_addpd || ins == INS_subps || ins == INS_subpd || ins == INS_mulps || ins == INS_mulpd || ins == INS_cmpps || ins == INS_cmppd || ins == INS_andps || ins == INS_andpd || ins == INS_orps || ins == INS_orpd || ins == INS_xorps || ins == INS_xorpd || ins == INS_dpps || ins == INS_dppd || ins == INS_haddpd || ins == INS_por || ins == INS_pand || ins == INS_pandn || ins == INS_pcmpeqd || ins == INS_pcmpgtd || ins == INS_pcmpeqw || ins == INS_pcmpgtw || ins == INS_pcmpeqb || ins == INS_pcmpgtb || ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_pmullw || ins == INS_shufps || ins == INS_shufpd || ins == INS_minps || ins == INS_minss || ins == INS_minpd || ins == INS_minsd || ins == INS_divps || ins == INS_divpd || ins == INS_maxps || ins == INS_maxpd || ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb || ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw || ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub || ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 ); } // Returns true if the AVX instruction is a move operator that requires 3 operands. // When we emit an instruction with only two operands, we will duplicate the source // register in the vvvv field. This is because these merge sources into the dest. // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to // be formalized by adding an additional field to instruction table to // to indicate whether a 3-operand instruction. bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins) { return IsAVXInstruction(ins) && ( ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss ); } #endif // Returns true if the AVX instruction is a 4-byte opcode. // Note that this should be true for any of the instructions in instrsXArch.h // that use the SSE38 or SSE3A macro. // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this // needs to be addressed by expanding instruction encodings. bool Is4ByteAVXInstruction(instruction ins) { #ifdef FEATURE_AVX_SUPPORT return ( ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw || ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 || ins == INS_pmulld ); #else return false; #endif } #ifdef FEATURE_AVX_SUPPORT // Returns true if this instruction requires a VEX prefix // All AVX instructions require a VEX prefix bool emitter::TakesVexPrefix(instruction ins) { // special case vzeroupper as it requires 2-byte VEX prefix if (ins == INS_vzeroupper) { return false; } return IsAVXInstruction(ins); } // Add base VEX prefix without setting W, R, X, or B bits // L bit will be set based on emitter attr. // // 3-byte VEX prefix = C4 // - R, X, B, W - bits to express corresponding REX prefixes // - m-mmmmm (5-bit) // 0-00001 - implied leading 0F opcode byte // 0-00010 - implied leading 0F 38 opcode bytes // 0-00011 - implied leading 0F 3A opcode bytes // Rest - reserved for future use and usage of them will uresult in Undefined instruction exception // // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused // - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1) // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix // these prefixes are treated mandatory when used with escape opcode 0Fh for // some SIMD instructions // 00 - None (0F - packed float) // 01 - 66 (66 0F - packed double) // 10 - F3 (F3 0F - scalar float // 11 - F2 (F2 0F - scalar double) // // TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX // prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar // and AVX-128 bit operations. #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL #define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr) { // Only AVX instructions require VEX prefix assert(IsAVXInstruction(ins)); // Shouldn't have already added Vex prefix assert(!hasVexPrefix(code)); // Set L bit to 1 in case of instructions that operate on 256-bits. code |= DEFAULT_3BYTE_VEX_PREFIX; if (attr == EA_32BYTE) { code |= LBIT_IN_3BYTE_VEX_PREFIX; } return code; } #endif // FEATURE_AVX_SUPPORT // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix bool TakesRexWPrefix(instruction ins, emitAttr attr) { #ifdef _TARGET_AMD64_ // movsx should always sign extend out to 8 bytes just because we don't track // whether the dest should be 4 bytes or 8 bytes (attr indicates the size // of the source, not the dest). // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special // cased here. // // Rex_jmp = jmp with rex prefix always requires rex.w prefix. if (ins == INS_movsx || ins == INS_rex_jmp) { return true; } if (EA_SIZE(attr) != EA_8BYTE) { return false; } if (IsSSEOrAVXInstruction(ins)) { if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm) { return true; } return false; } // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these // opcodes... // These are all the instructions that default to 8-byte operand without the REX.W bit // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes // so we never need it if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) && (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg))) { return true; } else { return false; } #else //!_TARGET_AMD64 = _TARGET_X86_ return false; #endif //!_TARGET_AMD64_ } // Returns true if using this register will require a REX.* prefix. // Since XMM registers overlap with YMM registers, this routine // can also be used to know whether a YMM register if the // instruction in question is AVX. bool IsExtendedReg(regNumber reg) { #ifdef _TARGET_AMD64_ return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15)); #else // X86 JIT operates in 32-bit mode and hence extended reg are not available. return false; #endif } // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix bool IsExtendedReg(regNumber reg, emitAttr attr) { #ifdef _TARGET_AMD64_ // Not a register, so doesn't need a prefix if (reg > REG_XMM15) return false; // Opcode field only has 3 bits for the register, these high registers // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B) if (IsExtendedReg(reg)) return true; if (EA_SIZE(attr) != EA_1BYTE) return false; // There are 12 one byte registers addressible 'below' r8b: // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil. // The first 4 are always addressible, the last 8 are divided into 2 sets: // ah, ch, dh, bh // -- or -- // spl, bpl, sil, dil // Both sets are encoded exactly the same, the difference is the presence // of a REX prefix, even a REX prefix with no other bits set (0x40). // So in order to get to the second set we need a REX prefix (but no bits). // // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of // encoding/tracking/encoding registers. return (reg >= REG_RSP); #else // X86 JIT operates in 32-bit mode and hence extended reg are not available. return false; #endif } // Amd64: Since XMM registers overlap with YMM registers, this routine // can also used to know whether a YMM register in case of AVX instructions. // // X86: we have XMM0-XMM7 available but this routine cannot be used to // determine whether a reg is XMM because they share the same reg numbers // with integer registers. Hence always return false. bool IsXMMReg(regNumber reg) { #ifdef _TARGET_AMD64_ return (reg >= REG_XMM0) && (reg <= REG_XMM15); #else return false; #endif } // Returns bits to be encoded in instruction for the given register. regNumber RegEncoding(regNumber reg) { #ifdef _TARGET_AMD64_ // Amd64: XMM registers do not share the same reg numbers as integer registers. // But register encoding of integer and XMM registers is the same. // Therefore, subtract XMMBASE from regNumber to get the register encoding // in case of XMM registers. return (regNumber)((IsXMMReg(reg) ? reg-XMMBASE : reg) & 0x7); #else // X86: XMM registers share the same reg numbers as integer registers and // hence nothing to do to get reg encoding. return (regNumber) (reg & 0x7); #endif } // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes // SSE2: separate 1-byte prefix gets added before opcode. // AVX: specific bits within VEX prefix need to be set in bit-inverted form. size_t emitter::AddRexWPrefix(instruction ins, size_t code) { #ifdef _TARGET_AMD64_ if (UseAVX() && IsAVXInstruction(ins)) { // W-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); // W-bit is the only bit that is added in non bit-inverted form. return code | 0x00008000000000ULL; } return code | 0x4800000000ULL; #else assert(!"UNREACHED"); return code; #endif } #ifdef _TARGET_AMD64_ size_t emitter::AddRexRPrefix(instruction ins, size_t code) { if (UseAVX() && IsAVXInstruction(ins)) { // Right now support 3-byte VEX prefix assert(hasVexPrefix(code)); // R-bit is added in bit-inverted form. return code & 0xFF7FFFFFFFFFFFULL; } return code | 0x4400000000ULL; } size_t emitter::AddRexXPrefix(instruction ins, size_t code) { if (UseAVX() && IsAVXInstruction(ins)) { //Right now support 3-byte VEX prefix assert(hasVexPrefix(code)); // X-bit is added in bit-inverted form. return code & 0xFFBFFFFFFFFFFFULL; } return code | 0x4200000000ULL; } size_t emitter::AddRexBPrefix(instruction ins, size_t code) { if (UseAVX() && IsAVXInstruction(ins)) { // Right now support 3-byte VEX prefix assert(hasVexPrefix(code)); // B-bit is added in bit-inverted form. return code & 0xFFDFFFFFFFFFFFULL; } return code | 0x4100000000ULL; } // Adds REX prefix (0x40) without W, R, X or B bits set size_t emitter::AddRexPrefix(instruction ins, size_t code) { assert(!UseAVX() || !IsAVXInstruction(ins)); return code | 0x4000000000ULL; } bool isPrefix(BYTE b) { assert(b != 0); // Caller should check this assert(b != 0x67); // We don't use the address size prefix assert(b != 0x65); // The GS segment override prefix is emitted separately assert(b != 0x64); // The FS segment override prefix is emitted separately assert(b != 0xF0); // The lock prefix is emitted separately assert(b != 0x2E); // We don't use the CS segment override prefix assert(b != 0x3E); // Or the DS segment override prefix assert(b != 0x26); // Or the ES segment override prefix assert(b != 0x36); // Or the SS segment override prefix // That just leaves the size prefixes used in SSE opcodes: // Scalar Double Scalar Single Packed Double return ((b == 0xF2) || (b == 0xF3) || (b == 0x66)); } #endif //_TARGET_AMD64_ // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t & code) { #ifdef _TARGET_AMD64_ if (hasVexPrefix(code)) { // Only AVX instructions should have a VEX prefix assert(UseAVX() && IsAVXInstruction(ins)); size_t vexPrefix = (code >> 32) & 0x00FFFFFF; code &= 0x00000000FFFFFFFFLL; WORD leadingBytes = 0; BYTE check = (code >> 24) & 0xFF; if (check != 0) { // 3-byte opcode: with the bytes ordered as 0x2211RM33 or // 4-byte opcode: with the bytes ordered as 0x22114433 // check for a prefix in the 11 position BYTE sizePrefix = (code >> 16) & 0xFF; if (sizePrefix != 0 && isPrefix(sizePrefix)) { // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits // // 00 - None (0F - packed float) // 01 - 66 (66 0F - packed double) // 10 - F3 (F3 0F - scalar float // 11 - F2 (F2 0F - scalar double) switch (sizePrefix) { case 0x66: vexPrefix |= 0x01; break; case 0xF3: vexPrefix |= 0x02; break; case 0xF2: vexPrefix |= 0x03; break; default: assert(!"unrecognized SIMD size prefix"); unreached(); } // Now the byte in the 22 position must be an escape byte 0F leadingBytes = check; assert(leadingBytes == 0x0F); // Get rid of both sizePrefix and escape byte code &= 0x0000FFFFLL; // Check the byte in the 33 position to see if it is 3A or 38. // In such a case escape bytes must be 0x0F3A or 0x0F38 check = code & 0xFF; if (check == 0x3A || check == 0x38) { leadingBytes = (leadingBytes << 8) | check; code &= 0x0000FF00LL; } } } else { // 2-byte opcode with the bytes ordered as 0x0011RM22 // the byte in position 11 must be an escape byte. leadingBytes = (code >> 16) & 0xFF; assert(leadingBytes == 0x0F || leadingBytes == 0x00); code &= 0xFFFF; } // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these // implied leading bytes switch (leadingBytes) { case 0x00: // there is no leading byte break; case 0x0F: vexPrefix |= 0x0100; break; case 0x0F38: vexPrefix |= 0x0200; break; case 0x0F3A: vexPrefix |= 0x0300; break; default: assert(!"encountered unknown leading bytes"); unreached(); } // At this point // VEX.2211RM33 got transformed as VEX.0000RM33 // VEX.0011RM22 got transformed as VEX.0000RM22 // // Now output VEX prefix leaving the 4-byte opcode emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); emitOutputByte(dst+1, ((vexPrefix >> 8) & 0xFF)); emitOutputByte(dst+2, vexPrefix & 0xFF); return 3; } else if (code > 0x00FFFFFFFFLL) { BYTE prefix = (code >> 32) & 0xFF; noway_assert(prefix >= 0x40 && prefix <= 0x4F); code &= 0x00000000FFFFFFFFLL; // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now) // we can remove this code as well // The REX prefix is required to come after all other prefixes. // Some of our 'opcodes' actually include some prefixes, if that // is the case, shift them over and place the REX prefix after // the other prefixes, and emit any prefix that got moved out. BYTE check = (code >> 24) & 0xFF; if (check == 0) { // 3-byte opcode: with the bytes ordered as 0x00113322 // check for a prefix in the 11 position check = (code >> 16) & 0xFF; if (check != 0 && isPrefix(check)) { // Swap the rex prefix and whatever this prefix is code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL)); // and then emit the other prefix return emitOutputByte(dst, check); } } else { // 4-byte opcode with the bytes ordered as 0x22114433 // first check for a prefix in the 11 position BYTE check2 = (code >> 16) & 0xFF; if (isPrefix(check2)) { assert(!isPrefix(check)); // We currently don't use this, so it is untested if (isPrefix(check)) { // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX // Change to c2rrc1XXXX, and emit check2 now code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL)); } else { // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) // Change to c2XXrrXXXX, and emit check2 now code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL)); } return emitOutputByte(dst, check2); } } return emitOutputByte(dst, prefix); } #endif //_TARGET_AMD64_ return 0; } #ifdef _TARGET_AMD64_ /***************************************************************************** * Is the last instruction emitted a call instruction? */ bool emitter::emitIsLastInsCall() { if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call)) return true; return false; } /***************************************************************************** * We're about to create an epilog. If the last instruction we output was a 'call', * then we need to insert a NOP, to allow for proper exception-handling behavior. */ void emitter::emitOutputPreEpilogNOP() { if (emitIsLastInsCall()) { emitIns(INS_nop); } } #endif //_TARGET_AMD64_ // Size of rex prefix in bytes unsigned emitter::emitGetRexPrefixSize(instruction ins) { // In case of AVX instructions, REX prefixes are part of VEX prefix. // And hence requires no additional byte to encode REX prefixes. if (IsAVXInstruction(ins)) return 0; // If not AVX, then we would need 1-byte to encode REX prefix. return 1; } // Size of vex prefix in bytes unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) { // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a // scope for size win by using 2-byte vex prefix for some of the // scalar, avx-128 and most common avx-256 instructions. if (IsAVXInstruction(ins)) return 3; // If not AVX, then we don't need to encode vex prefix. return 0; } // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate. // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that // instruction size estimate will be accurate. // Basically this function will decrease the vexPrefixSize, // so that opcodeSize + vexPrefixAdjustedSize will be the right size. // rightOpcodeSize + vexPrefixSize //=(opcodeSize - ExtrabytesSize) + vexPrefixSize //=opcodeSize + (vexPrefixSize - ExtrabytesSize) //=opcodeSize + vexPrefixAdjustedSize unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code) { #ifdef _TARGET_AMD64_ if (IsAVXInstruction(ins)) { unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); // Currently vex prefix size is hard coded as 3 bytes, // In future we should support 2 bytes vex prefix. assert(vexPrefixAdjustedSize == 3); // In this case, opcode will contains escape prefix at least one byte, // vexPrefixAdjustedSize should be minus one. vexPrefixAdjustedSize -= 1; // Get the fourth byte in Opcode. // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. BYTE check = (code >> 24) & 0xFF; if (check != 0) { // 3-byte opcode: with the bytes ordered as 0x2211RM33 or // 4-byte opcode: with the bytes ordered as 0x22114433 // Simd prefix is at the first byte. BYTE sizePrefix = (code >> 16) & 0xFF; if (sizePrefix != 0 && isPrefix(sizePrefix)) { vexPrefixAdjustedSize -= 1; } // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. // But in this case the opcode has not counted R\M part. // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize //=opcodeSize + VexPrefixAdjustedSize -1 + 1 //=opcodeSize + VexPrefixAdjustedSize // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize. } return vexPrefixAdjustedSize; } #endif return 0; } // Get size of rex or vex prefix emitted in code unsigned emitter::emitGetPrefixSize(size_t code) { #ifdef FEATURE_AVX_SUPPORT if (code & VEX_PREFIX_MASK_3BYTE) { return 3; } else #endif if (code & REX_PREFIX_MASK) { return 1; } return 0; } #ifdef _TARGET_X86_ /***************************************************************************** * * Record a non-empty stack */ void emitter::emitMarkStackLvl(unsigned stackLevel) { assert(int(stackLevel) >= 0); assert(emitCurStackLvl == 0); assert(emitCurIG->igStkLvl == 0); assert(emitCurIGfreeNext == emitCurIGfreeBase); assert(stackLevel && stackLevel % sizeof(int) == 0); emitCurStackLvl = emitCurIG->igStkLvl = stackLevel; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } #endif /***************************************************************************** * * Get hold of the address mode displacement value for an indirect call. */ inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id) { if (id->idIsLargeCall()) { return ((instrDescCGCA*)id)->idcDisp; } else { assert(!id->idIsLargeDsp()); assert(!id->idIsLargeCns()); return id->idAddr()->iiaAddrMode.amDisp; } } /** *************************************************************************** * * The following table is used by the instIsFP()/instUse/DefFlags() helpers. */ #define INST_DEF_FL 0x20 // does the instruction set flags? #define INST_USE_FL 0x40 // does the instruction use flags? const BYTE CodeGenInterface::instInfo[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #define INST1(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #define INST2(id, nm, fp, um, rf, wf, mr, mi ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp), #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; /***************************************************************************** * * Initialize the table used by emitInsModeFormat(). */ const BYTE emitter::emitInsModeFmtTab[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) um, #define INST1(id, nm, fp, um, rf, wf, mr ) um, #define INST2(id, nm, fp, um, rf, wf, mr, mi ) um, #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) um, #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) um, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; #ifdef DEBUG unsigned const emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab)/ sizeof(emitInsModeFmtTab[0]); #endif /***************************************************************************** * * Combine the given base format with the update mode of the instuction. */ inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base) { assert(IF_RRD + IUM_RD == IF_RRD); assert(IF_RRD + IUM_WR == IF_RWR); assert(IF_RRD + IUM_RW == IF_RRW); return (insFormat)(base + emitInsUpdateMode(ins)); } /***************************************************************************** * * A version of scInsModeFormat() that handles X87 floating-point instructions. */ #if FEATURE_STACK_FP_X87 emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst) { if (CodeGen::instIsFP(ins)) { assert(IF_TRD_SRD + 1 == IF_TWR_SRD); assert(IF_TRD_SRD + 2 == IF_TRW_SRD); assert(IF_TRD_MRD + 1 == IF_TWR_MRD); assert(IF_TRD_MRD + 2 == IF_TRW_MRD); assert(IF_TRD_ARD + 1 == IF_TWR_ARD); assert(IF_TRD_ARD + 2 == IF_TRW_ARD); switch (ins) { case INS_fst: case INS_fstp: case INS_fistp: case INS_fistpl: return (insFormat)(FPst ); case INS_fld: case INS_fild: return (insFormat)(FPld+1); case INS_fcomp: case INS_fcompp: case INS_fcomip: return (insFormat)(FPld ); default: return (insFormat)(FPld+2); } } else { return emitInsModeFormat(ins, base); } } #endif // FEATURE_STACK_FP_X87 // This is a helper we need due to Vs Whidbey #254016 in order to distinguish // if we can not possibly be updating an integer register. This is not the best // solution, but the other ones (see bug) are going to be much more complicated. // The issue here is that on x86, the XMM registers us the same register numbers // as the general purpose registers, so we need to distinguish them. // We really only need this for x86 where this issue exists. bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) { instruction ins = id->idIns(); // The following SSE2 instructions write to a general purpose integer register. if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si #ifdef _TARGET_AMD64_ || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si #endif // _TARGET_AMD64_ ) { return false; } return true; } /***************************************************************************** * * Returns the base encoding of the given CPU instruction. */ inline size_t insCode(instruction ins) { const static size_t insCodes[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) mr, #define INST1(id, nm, fp, um, rf, wf, mr ) mr, #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr, #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr, #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; assert((unsigned)ins < sizeof(insCodes)/sizeof(insCodes[0])); assert((insCodes[ins] != BAD_CODE)); return insCodes[ins]; } /***************************************************************************** * * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction. */ inline size_t insCodeMI(instruction ins) { const static size_t insCodesMI[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) #define INST1(id, nm, fp, um, rf, wf, mr ) #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi, #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi, #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; assert((unsigned)ins < sizeof(insCodesMI)/sizeof(insCodesMI[0])); assert((insCodesMI[ins] != BAD_CODE)); return insCodesMI[ins]; } /***************************************************************************** * * Returns the "reg, [r/m]" encoding of the given CPU instruction. */ inline size_t insCodeRM(instruction ins) { const static size_t insCodesRM[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) #define INST1(id, nm, fp, um, rf, wf, mr ) #define INST2(id, nm, fp, um, rf, wf, mr, mi ) #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm, #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; assert((unsigned)ins < sizeof(insCodesRM)/sizeof(insCodesRM[0])); assert((insCodesRM[ins] != BAD_CODE)); return insCodesRM[ins]; } /***************************************************************************** * * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction. */ inline size_t insCodeACC(instruction ins) { const static size_t insCodesACC[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) #define INST1(id, nm, fp, um, rf, wf, mr ) #define INST2(id, nm, fp, um, rf, wf, mr, mi ) #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) a4, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; assert((unsigned)ins < sizeof(insCodesACC)/sizeof(insCodesACC[0])); assert((insCodesACC[ins] != BAD_CODE)); return insCodesACC[ins]; } /***************************************************************************** * * Returns the "register" encoding of the given CPU instruction. */ inline size_t insCodeRR(instruction ins) { const static size_t insCodesRR[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) #define INST1(id, nm, fp, um, rf, wf, mr ) #define INST2(id, nm, fp, um, rf, wf, mr, mi ) #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; assert((unsigned)ins < sizeof(insCodesRR)/sizeof(insCodesRR[0])); assert((insCodesRR[ins] != BAD_CODE)); return insCodesRR[ins]; } const static size_t insCodesMR[] = { #define INST0(id, nm, fp, um, rf, wf, mr ) #define INST1(id, nm, fp, um, rf, wf, mr ) mr, #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr, #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr, #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr, #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr, #include "instrs.h" #undef INST0 #undef INST1 #undef INST2 #undef INST3 #undef INST4 #undef INST5 }; // Returns true iff the give CPU instruction has an MR encoding. inline size_t hasCodeMR(instruction ins) { assert((unsigned)ins < sizeof(insCodesMR)/sizeof(insCodesMR[0])); return((insCodesMR[ins] != BAD_CODE)); } /***************************************************************************** * * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction. */ inline size_t insCodeMR(instruction ins) { assert((unsigned)ins < sizeof(insCodesMR)/sizeof(insCodesMR[0])); assert((insCodesMR[ins] != BAD_CODE)); return insCodesMR[ins]; } /***************************************************************************** * * Returns an encoding for the specified register to be used in the bit0-2 * part of an opcode. */ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code) { assert(reg < REG_STK); #ifndef LEGACY_BACKEND #ifdef _TARGET_AMD64_ // Either code is not NULL or reg is not an extended reg. // If reg is an extended reg, instruction needs to be prefixed with 'REX' // which would require code != NULL. assert(code != nullptr || !IsExtendedReg(reg)); if (IsExtendedReg(reg)) { *code = AddRexBPrefix(ins, *code); // REX.B } else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) { // We are assuming that we only use/encode SPL, BPL, SIL and DIL // not the corresponding AH, CH, DH, or BH *code = AddRexPrefix(ins, *code); // REX } #endif // _TARGET_AMD64_ reg = RegEncoding(reg); assert(reg < 8); return reg; #else // LEGACY_BACKEND assert(reg < 8); return reg; #endif // LEGACY_BACKEND } /***************************************************************************** * * Returns an encoding for the specified register to be used in the bit3-5 * part of an opcode. */ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code) { assert(reg < REG_STK); #ifndef LEGACY_BACKEND #ifdef _TARGET_AMD64_ // Either code is not NULL or reg is not an extended reg. // If reg is an extended reg, instruction needs to be prefixed with 'REX' // which would require code != NULL. assert(code != nullptr || !IsExtendedReg(reg)); if (IsExtendedReg(reg)) { *code = AddRexRPrefix(ins, *code); // REX.R } else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) { // We are assuming that we only use/encode SPL, BPL, SIL and DIL // not the corresponding AH, CH, DH, or BH *code = AddRexPrefix(ins, *code); // REX } #endif // _TARGET_AMD64_ reg = RegEncoding(reg); #ifdef _TARGET_AMD64_ assert(reg < REG_R8); #endif // !_TARGET_AMD64_ return(reg<< 3); #else // LEGACY_BACKEND assert(reg < 8); return(reg<< 3); #endif // LEGACY_BACKEND } /*********************************************************************************** * * Returns modified AVX opcode with the specified register encoded in bits 3-6 of * byte 2 of VEX prefix. */ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code) { #ifdef FEATURE_AVX_SUPPORT assert(reg < REG_STK); assert(IsAVXInstruction(ins)); assert(hasVexPrefix(code)); // Get 4-bit register encoding // RegEncoding() gives lower 3 bits // IsExtendedReg() gives MSB. size_t regBits = RegEncoding(reg); if (IsExtendedReg(reg)) { regBits |= 0x08; } // VEX prefix encodes register operand in 1's complement form // Shift count = 4-bytes of opcode + 0-2 bits assert(regBits <= 0xF); regBits <<= 35; return code ^ regBits; #else return code; #endif } /***************************************************************************** * * Returns an encoding for the specified register to be used in the bit3-5 * part of an SIB byte (unshifted). * Used exclusively to generate the REX.X bit and truncate the register. */ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code) { assert(reg < REG_STK); #ifdef _TARGET_AMD64_ // Either code is not NULL or reg is not an extended reg. // If reg is an extended reg, instruction needs to be prefixed with 'REX' // which would require code != NULL. assert(code != NULL || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8)); if (IsExtendedReg(reg)) { *code = AddRexXPrefix(ins, *code); // REX.X } reg = RegEncoding(reg); #endif assert(reg < 8); return reg; } /***************************************************************************** * * Returns the "[r/m]" opcode with the mod/RM field set to register. */ inline size_t emitter::insEncodeMRreg(instruction ins, size_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. if ((code & 0xFF00) == 0) { assert((code & 0xC000) == 0); code |= 0xC000; } return code; } /***************************************************************************** * * Returns the "[r/m], icon" opcode with the mod/RM field set to register. */ inline size_t insEncodeMIreg(instruction ins, size_t code) { assert((code & 0xC000) == 0); code |= 0xC000; return code; } /***************************************************************************** * * Returns the given "[r/m]" opcode with the mod/RM field set to register. */ inline size_t insEncodeRMreg(instruction ins, size_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. if ((code & 0xFF00) == 0) { assert((code & 0xC000) == 0); code |= 0xC000; } return code; } /***************************************************************************** * * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to * the given register. */ inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code) { assert((code & 0xC000) == 0); code |= 0xC000; unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8; code |= regcode; return code; } /***************************************************************************** * * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to * the given register. */ inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code) { assert((code & 0xC000) == 0); code |= 0xC000; unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8; code |= regcode; return code; } /***************************************************************************** * * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a * "reg,reg,imm8" form. */ inline bool insNeedsRRIb(instruction ins) { // If this list gets longer, use a switch or a table. return ins == INS_imul; } /***************************************************************************** * * Returns the "reg,reg,imm8" opcode with both the reg's set to the * the given register. */ inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) { assert(size == EA_4BYTE); // All we handle for now. assert(insNeedsRRIb(ins)); // If this list gets longer, use a switch, or a table lookup. size_t code = 0x69c0; unsigned regcode = insEncodeReg012(ins, reg, size, &code); // We use the same register as source and destination. (Could have another version that does both regs...) code |= regcode; code |= (regcode << 3); return code; } /***************************************************************************** * * Returns the "+reg" opcode with the the given register set into the low * nibble of the opcode */ inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) { size_t code = insCodeRR(ins); unsigned regcode = insEncodeReg012(ins, reg, size, &code); code |= regcode; return code; } /***************************************************************************** * * Return the 'SS' field value for the given index scale factor. */ inline unsigned insSSval(unsigned scale) { assert(scale == 1 || scale == 2 || scale == 4 || scale == 8); const static BYTE scales[] = { 0x00, // 1 0x40, // 2 0xFF, // 3 0x80, // 4 0xFF, // 5 0xFF, // 6 0xFF, // 7 0xC0, // 8 }; return scales[scale-1]; } const instruction emitJumpKindInstructions[] = { INS_nop, #define JMP_SMALL(en, rev, ins) INS_##ins, #include "emitjmps.h" INS_call }; const emitJumpKind emitReverseJumpKinds[] = { EJ_NONE, #define JMP_SMALL(en, rev, ins) EJ_##rev, #include "emitjmps.h" }; /***************************************************************************** * Look up the instruction for a jump kind */ /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) { assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); return emitJumpKindInstructions[jumpKind]; } /***************************************************************************** * Reverse the conditional jump */ /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) { assert(jumpKind < EJ_COUNT); return emitReverseJumpKinds[jumpKind]; } /***************************************************************************** * The size for these instructions is less than EA_4BYTE, * but the target register need not be byte-addressable */ inline bool emitInstHasNoCode(instruction ins) { if (ins == INS_align) return true; return false; } /***************************************************************************** * When encoding instructions that operate on byte registers * we have to ensure that we use a low register (EAX, EBX, ECX or EDX) * otherwise we will incorrectly encode the instruction */ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */) { #if CPU_HAS_BYTE_REGS if (size != EA_1BYTE) // Not operating on a byte register is fine { return true; } if ((ins != INS_movsx) && // These two instructions support high register (ins != INS_movzx) ) // encodings for reg1 { // reg1 must be a byte-able register if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0) { return false; } } // if reg2 is not REG_NA then reg2 must be a byte-able register if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)) { return false; } #endif // The instruction can be encoded return true; } /***************************************************************************** * * Estimate the size (in bytes of generated code) of the given instruction. */ inline UNATIVE_OFFSET emitter::emitInsSize(size_t code) { UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; #ifdef _TARGET_AMD64_ size += emitGetPrefixSize(code); #endif return size; } inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins) { return emitInsSize(insCodeRM(ins)); } inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr) { emitAttr size = EA_SIZE(attr); UNATIVE_OFFSET sz; #ifdef _TARGET_AMD64_ // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. // This would probably be better expressed as a different format or something? if (insCodeRM(ins) & 0xFF00) { sz = 5; } else #endif // _TARGET_AMD64 { size_t code = insCodeRM(ins); sz = emitInsSize(insEncodeRMreg(ins, code)); } // Most 16-bit operand instructions will need a prefix if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) { sz += 1; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins)); // REX prefix if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) { sz += emitGetRexPrefixSize(ins); } return sz; } /*****************************************************************************/ inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp) { UNATIVE_OFFSET size = emitInsSize(code); UNATIVE_OFFSET offs; bool offsIsUpperBound = true; bool EBPbased = true; /* Is this a temporary? */ if (var < 0) { /* An address off of ESP takes an extra byte */ if (!emitHasFramePtr) size++; #ifndef LEGACY_BACKEND // The offset is already assigned. Find the temp. TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED); if (tmp == nullptr) { // It might be in the free lists, if we're working on zero initializing the temps. tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE); } assert(tmp != nullptr); offs = tmp->tdTempOffs(); // We only care about the magnitude of the offset here, to determine instruction size. if (emitComp->isFramePointerUsed()) { if ((int)offs < 0) { offs = -(int)offs; } } else { // SP-based offsets must already be positive. assert((int)offs >= 0); } #else // LEGACY_BACKEND /* We'll have to estimate the max. possible offset of this temp */ // TODO: Get an estimate of the temp offset instead of assuming // TODO: that any temp may be at the max. temp offset!!!!!!!!!! if (emitComp->lvaTempsHaveLargerOffsetThanVars()) { offs = emitLclSize + emitMaxTmpSize; } else { offs = emitMaxTmpSize; } offsIsUpperBound = false; #endif // LEGACY_BACKEND } else { /* Get the frame offset of the (non-temp) variable */ offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased); /* An address off of ESP takes an extra byte */ if (!EBPbased) { ++size; } /* Is this a stack parameter reference? */ if ( emitComp->lvaIsParameter (var) #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) && !emitComp->lvaIsRegArgument(var) #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI ) { /* If no EBP frame, arguments are off of ESP, above temps */ if (!EBPbased) { assert((int)offs >= 0); offsIsUpperBound = false; // since #temps can increase offs += emitMaxTmpSize; } } else { /* Locals off of EBP are at negative offsets */ if (EBPbased) { #ifdef _TARGET_AMD64_ // If localloc is not used, then ebp chaining is done and hence // offset of locals will be at negative offsets, Otherwise offsets // will be positive. In future, when RBP gets positioned in the // middle of the frame so as to optimize instruction encoding size, // the below asserts needs to be modified appropriately. if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC) { noway_assert((int)offs >= 0); } else #endif { // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes #ifdef UNIX_AMD64_ABI LclVarDsc* varDsc = emitComp->lvaTable + var; bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg; // Register passed args could have a stack offset of 0. noway_assert((int)offs < 0 || isRegPassedArg); #else // !UNIX_AMD64_ABI noway_assert((int)offs < 0); #endif // !UNIX_AMD64_ABI } assert(emitComp->lvaTempsHaveLargerOffsetThanVars()); // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar || unsigned(var) == emitComp->lvaStubArgumentVar) { offs -= emitMaxTmpSize; } if ((int) offs < 0) { // offset is negative return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof( int)); } #ifdef _TARGET_AMD64_ // This case arises for localloc frames else { return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int)); } #endif } if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false) { offs += emitMaxTmpSize; } } } assert((int)offs >= 0); #if !FEATURE_FIXED_OUT_ARGS /* Are we addressing off of ESP? */ if (!emitHasFramePtr) { /* Adjust the effective offset if necessary */ if (emitCntStackDepth) offs += emitCurStackLvl; // we could (and used to) check for the special case [sp] here but the stack offset // estimator was off, and there is very little harm in overestimating for such a // rare case. } #endif // !FEATURE_FIXED_OUT_ARGS // printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n", // emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs); #ifdef _TARGET_AMD64_ bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX); #else bool useSmallEncoding = (offs <= size_t(SCHAR_MAX)); #endif #ifdef LEGACY_BACKEND /* If we are using a small encoding, there is a danger that we might end up having to use a larger encoding. Record 'offs' so that we can detect if such a situation occurs */ if (useSmallEncoding && !offsIsUpperBound) { if (emitGrowableMaxByteOffs < offs) { emitGrowableMaxByteOffs = offs; #ifdef DEBUG // Remember which instruction this is emitMaxByteOffsIdNum = emitInsCount; #endif } } #endif // LEGACY_BACKEND // If it is ESP based, and the offset is zero, we will not encode the disp part. if (!EBPbased && offs == 0) { return size; } else { return size + (useSmallEncoding ? sizeof(char) : sizeof( int)); } } inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); UNATIVE_OFFSET prefix = 0; bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc()); #endif // _TARGET_AMD64_ if (valSize > sizeof(int)) { valSize = sizeof(int); } #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte assert(valSize == sizeof(int)); } #endif if (valInByte) { valSize = sizeof(char); } // 16-bit operand instructions need a prefix. // This referes to 66h size prefix override if (id->idOpSize() == EA_2BYTE) { prefix = 1; } return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp); } /*****************************************************************************/ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) { emitAttr attrSize = id->idOpSize(); instruction ins = id->idIns(); /* The displacement field is in an unusual place for calls */ ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id); bool dspInByte = ((signed char)dsp == (ssize_t)dsp); bool dspIsZero = (dsp == 0); UNATIVE_OFFSET size; // Note that the values in reg and rgx are used in this method to decide // how many bytes will be needed by the address [reg+rgx+cns] // this includes the prefix bytes when reg or rgx are registers R8-R15 regNumber reg; regNumber rgx; // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD // ideally these should really be the only idInsFmts that we see here // but we have some outliers to deal with: // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD and calls emitInsSizeAM switch (id->idInsFmt()) { case IF_RWR_LABEL: case IF_MRW_CNS: case IF_MRW_RRD: reg = REG_NA; rgx = REG_NA; break; default: reg = id->idAddr()->iiaAddrMode.amBaseReg; rgx = id->idAddr()->iiaAddrMode.amIndxReg; break; } #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { dspInByte = false; // relocs can't be placed in a byte dspIsZero = false; // relocs won't always be zero } #endif if (code & 0xFF000000) { size = 4; } else if(code & 0x00FF0000) { assert( (attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 || (attrSize == EA_16BYTE) // only for x64 || (ins == INS_movzx) || (ins == INS_movsx)); size = 3; } else { size = 2; // Most 16-bit operands will require a size prefix . // This refers to 66h size prefix override. #if FEATURE_STACK_FP_X87 if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw)) #else // FEATURE_STACK_FP_X87 if (attrSize == EA_2BYTE) #endif // FEATURE_STACK_FP_X87 { size++; } } #ifdef _TARGET_AMD64_ size += emitGetVexPrefixAdjustedSize(ins, attrSize, code); if (code & REX_PREFIX_MASK) { // REX prefix size += emitGetRexPrefixSize(ins); } else if (TakesRexWPrefix(ins, attrSize)) { // REX.W prefix size += emitGetRexPrefixSize(ins); } else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) || IsExtendedReg(id->idReg1(), attrSize)) { // Should have a REX byte size += emitGetRexPrefixSize(ins); } #endif // _TARGET_AMD64_ if (rgx == REG_NA) { /* The address is of the form "[reg+disp]" */ switch (reg) { case REG_NA: /* The address is of the form "[disp]" */ size += sizeof(INT32); #ifdef _TARGET_AMD64_ // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32 if (!id->idIsDspReloc()) { size++; } #endif return size; case REG_EBP: AMD64_ONLY(case REG_R13:) break; case REG_ESP: AMD64_ONLY(case REG_R12:) size++; __fallthrough; default: if (dspIsZero) return size; } /* Does the offset fit in a byte? */ if (dspInByte) size += sizeof(char); else size += sizeof(INT32); } else { /* An index register is present */ size++; /* Is the index value scaled? */ if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1) { /* Is there a base register? */ if (reg != REG_NA) { /* The address is "[reg + {2/4/8} * rgx + icon]" */ if (dspIsZero && reg != REG_EBP AMD64_ONLY( && reg != REG_R13)) { /* The address is "[reg + {2/4/8} * rgx]" */ } else { /* The address is "[reg + {2/4/8} * rgx + disp]" */ if (dspInByte) size += sizeof(char); else size += sizeof(int ); } } else { /* The address is "[{2/4/8} * rgx + icon]" */ size += sizeof(INT32); } } else { if (dspIsZero && ((reg == REG_EBP) AMD64_ONLY( || (reg == REG_R13))) && ((rgx != REG_EBP) AMD64_ONLY( && (rgx != REG_R13)))) { /* Swap reg and rgx, such that reg is not EBP/R13 */ regNumber tmp = reg; id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx; id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp; } /* The address is "[reg+rgx+dsp]" */ if (dspIsZero && reg != REG_EBP AMD64_ONLY( && reg != REG_R13)) { /* This is [reg+rgx]" */ } else { /* This is [reg+rgx+dsp]" */ if (dspInByte) size += sizeof(char); else size += sizeof(int ); } } } return size; } inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc()); #endif // _TARGET_AMD64_ if (valSize > sizeof(INT32)) { valSize = sizeof(INT32); } #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte assert(valSize == sizeof(INT32)); } #endif if (valInByte) { valSize = sizeof(char); } return valSize + emitInsSizeAM(id, code); } inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code) { instruction ins = id->idIns(); // fgMorph changes any statics that won't fit into 32-bit addresses // into constants with an indir, rather than GT_CLS_VAR // so we should only hit this path for statics that are RIP-relative UNATIVE_OFFSET size = sizeof(INT32); // Most 16-bit operand instructions will need a prefix. // This refers to 66h size prefix override. if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx) { size++; } return size + emitInsSize(code); } inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifndef _TARGET_AMD64_ // occasionally longs get here on x86 if (valSize > sizeof(INT32)) valSize = sizeof(INT32); #endif // !_TARGET_AMD64_ #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte assert(valSize == sizeof(INT32)); } #endif if (valInByte) { valSize = sizeof(char); } return valSize + emitInsSizeCV(id, code); } /***************************************************************************** * * Allocate instruction descriptors for instructions with address modes. */ inline emitter::instrDesc* emitter::emitNewInstrAmd (emitAttr size, ssize_t dsp) { if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX) { instrDescAmd* id = emitAllocInstrAmd (size); id->idSetIsLargeDsp(); #ifdef DEBUG id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; #endif id->idaAmdVal = dsp; return id; } else { instrDesc* id = emitAllocInstr (size); id->idAddr()->iiaAddrMode.amDisp = dsp; assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit return id; } } /***************************************************************************** * * Set the displacement field in an instruction. Only handles instrDescAmd type. */ inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp) { if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX) { id->idSetIsLargeDsp(); #ifdef DEBUG id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; #endif id->idaAmdVal = dsp; } else { id->idSetIsSmallDsp(); id->idAddr()->iiaAddrMode.amDisp = dsp; assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit } } /***************************************************************************** * * Allocate an instruction descriptor for an instruction that uses both * an address mode displacement and a constant. */ emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns) { if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX) { if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS) { instrDesc* id = emitAllocInstr (size); id->idSmallCns(cns); id->idAddr()->iiaAddrMode.amDisp = dsp; assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit return id; } else { instrDescCns* id = emitAllocInstrCns (size); id->idSetIsLargeCns(); id->idcCnsVal = cns; id->idAddr()->iiaAddrMode.amDisp = dsp; assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit return id; } } else { if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS) { instrDescAmd* id = emitAllocInstrAmd (size); id->idSetIsLargeDsp(); #ifdef DEBUG id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; #endif id->idaAmdVal = dsp; id->idSmallCns(cns); return id; } else { instrDescCnsAmd* id = emitAllocInstrCnsAmd(size); id->idSetIsLargeCns(); id->idacCnsVal = cns; id->idSetIsLargeDsp(); #ifdef DEBUG id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; #endif id->idacAmdVal = dsp; return id; } } } /***************************************************************************** * * The next instruction will be a loop head entry point * So insert a dummy instruction here to ensure that * the x86 I-cache alignment rule is followed. */ void emitter::emitLoopAlign() { /* Insert a pseudo-instruction to ensure that we align the next instruction properly */ instrDesc* id = emitNewInstrTiny(EA_1BYTE); id->idIns(INS_align); id->idCodeSize(15); // We may need to skip up to 15 bytes of code emitCurIGsize += 15; } /***************************************************************************** * * Add a NOP instruction of the given size. */ void emitter::emitIns_Nop(unsigned size) { assert(size <= 15); instrDesc* id = emitNewInstr(); id->idIns(INS_nop); id->idInsFmt(IF_NONE); id->idCodeSize(size); dispIns(id); emitCurIGsize += size; } /***************************************************************************** * * Add an instruction with no operands. */ #ifdef DEBUG static bool isX87InsWithNoOperands(instruction ins) { #if FEATURE_STACK_FP_X87 return ( ins == INS_f2xm1 || ins == INS_fchs || ins == INS_fld1 || ins == INS_fld1 || ins == INS_fldl2e || ins == INS_fldz || ins == INS_fprem || ins == INS_frndint || ins == INS_fscale ); #else // !FEATURE_STACK_FP_X87 return false; #endif // !FEATURE_STACK_FP_X87 } #endif // DEBUG void emitter::emitIns(instruction ins) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(); size_t code = insCodeMR(ins); #ifdef DEBUG #if FEATURE_STACK_FP_X87 if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos) #endif // FEATURE_STACK_FP_X87 { // We cannot have #ifdef inside macro expansion. bool assertCond = (ins == INS_cdq || isX87InsWithNoOperands(ins) || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb || ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret || ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp #ifdef _TARGET_AMD64_ || ins == INS_vzeroupper #endif ); assert(assertCond); } #endif //DEBUG #ifdef _TARGET_AMD64_ assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right? #endif // _TARGET_AMD64_ if (code & 0xFF000000) sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case. else if (code & 0x00FF0000) sz = 3; else if (code & 0x0000FF00) sz = 2; else sz = 1; #ifdef _TARGET_AMD64_ // Account for 2-byte VEX prefix in case of vzeroupper if (ins == INS_vzeroupper) { sz += 2; } #endif insFormat fmt = IF_NONE; #if FEATURE_STACK_FP_X87 if (CodeGen::instIsFP(ins)) { fmt = emitInsModeFormat(ins, IF_TRD); } #endif // FEATURE_STACK_FP_X87 id->idIns(ins); id->idInsFmt(fmt); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } #if !defined(LEGACY_BACKEND) // Add an instruction with no operands, but whose encoding depends on the size // (Only CDQ/CQO currently) void emitter::emitIns(instruction ins, emitAttr attr) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); size_t code = insCodeMR(ins); assert(ins == INS_cdq); assert((code & 0xFFFFFF00) == 0); sz = 1; insFormat fmt = IF_NONE; sz += emitGetVexPrefixAdjustedSize(ins, attr, code); if (TakesRexWPrefix(ins, attr)) sz += emitGetRexPrefixSize(ins); id->idIns(ins); id->idInsFmt(fmt); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } // fill in all the fields void emitter::emitHandleMemOp(GenTree* mem, instrDesc* id, bool isSrc) { GenTreeIndir* indir = mem->AsIndir(); GenTree* memBase = indir->Base(); if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR)) { CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd; // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) { // Contract: // fgMorphField() changes any statics that won't fit into 32-bit addresses into // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable. // // Data section constants: these get allocated close to code block of the method and // always addressable IP relative. These too should be marked as relocatable. id->idSetIsDspReloc(); } id->idAddr()->iiaFieldHnd = fldHnd; if (isSrc) { id->idInsFmt(IF_RRD_MRD); } else { id->idInsFmt(IF_MRD_RRD); } } else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained()) { // Absolute addresses marked as contained should fit within the base of addr mode. assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); // Either not generating relocatable code or addr must be an icon handle assert(!emitComp->opts.compReloc || memBase->IsIconHandle()); if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { id->idSetIsDspReloc(); } if (isSrc) { id->idInsFmt(IF_RRD_ARD); } else { id->idInsFmt(IF_ARD_RRD); } id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue()); } else { if (memBase != nullptr) { id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum; } else { id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; } if (indir->HasIndex()) { id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum; } else { id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; } id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale()); // already set disp in ctor assert(emitGetInsAmdAny(id) == ssize_t(indir->Offset())); // make sure "disp" is stored properly } } // Takes care of storing all incoming register parameters // into its corresponding shadow space (defined by the x64 ABI) void emitter::spillIntArgRegsToShadowSlots() { unsigned argNum; instrDesc* id; UNATIVE_OFFSET sz; assert(emitComp->compGeneratingProlog); for (argNum = 0; argNum < MAX_REG_ARG; ++argNum) { regNumber argReg = intArgRegs[argNum]; // The offsets for the shadow space start at RSP + 8 // (right before the caller return address) int offset = (argNum + 1) * EA_PTRSIZE; id = emitNewInstrAmd(EA_PTRSIZE, offset); id->idIns(INS_mov); id->idInsFmt(IF_AWR_RRD); id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1); // The offset has already been set in the intrDsc ctor, // make sure we got it right. assert(emitGetInsAmdAny(id) == ssize_t(offset)); id->idReg1(argReg); sz = emitInsSizeAM(id, insCodeMR(INS_mov)); id->idCodeSize(sz); emitCurIGsize += sz; } } // this is very similar to emitInsBinary and probably could be folded in to same // except the requirements on the incoming parameter are different, // ex: the memory op in storeind case must NOT be contained void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) { UNATIVE_OFFSET sz; instrDesc* id; switch (node->OperGet()) { case GT_IND: { GenTreeIndir* mem = node->AsIndir(); if (mem->HasBase() && mem->Base()->OperGet() == GT_CLS_VAR_ADDR) { emitIns_R_C(ins, attr, node->gtRegNum, mem->Base()->gtClsVar.gtClsVarHnd, 0); return; } else if (mem->Addr()->OperGet() == GT_LCL_VAR_ADDR) { GenTreeLclVarCommon* varNode = mem->Addr()->AsLclVarCommon(); emitIns_R_S(ins, attr, node->gtRegNum, varNode->GetLclNum(), 0); codeGen->genUpdateLife(varNode); return; } else { GenTreePtr addr = mem->Addr(); assert (addr->OperIsAddrMode() || addr->gtOper == GT_CLS_VAR_ADDR || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained()); size_t offset = mem->Offset(); id = emitNewInstrAmd(attr, offset); id->idIns(ins); id->idReg1(node->gtRegNum); id->idInsFmt(IF_RWR_ARD); emitHandleMemOp(node, id, true); // may overwrite format if (addr->IsCnsIntOrI() && addr->isContained()) { // Absolute addresses marked as contained should fit within the base of addr mode. assert(addr->AsIntConCommon()->FitsInAddrBase(emitComp)); // Case of "ins re, [disp]" and should use IF_RWR_ARD as format id->idInsFmt(IF_RWR_ARD); } sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); } } break; case GT_STOREIND: { GenTreeIndir* mem = node->AsIndir(); GenTree* memBase = mem->Base(); size_t offset = mem->Offset(); GenTree* data = node->gtOp.gtOp2; if ((memBase != nullptr) && (memBase->OperGet() == GT_CLS_VAR_ADDR)) { if (data->isContained()) { emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int) data->AsIntConCommon()->IconValue()); } else { emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); } return; } if (mem->Addr()->OperGet() == GT_LCL_VAR_ADDR) { GenTreeLclVarCommon* varNode = memBase->AsLclVarCommon(); if (data->isContainedIntOrIImmed()) { emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int) data->AsIntConCommon()->IconValue()); codeGen->genUpdateLife(varNode); } else { assert(!data->isContained()); emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0); codeGen->genUpdateLife(varNode); } return; } if (data->isContainedIntOrIImmed()) { int icon = (int) data->AsIntConCommon()->IconValue(); id = emitNewInstrAmdCns(attr, offset, icon); id->idIns(ins); id->idInsFmt(IF_AWR_CNS); emitHandleMemOp(node, id, false); // may overwrite format if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained()) { // Absolute addresses marked as contained should fit within the base of addr mode. assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); // Case of "ins [disp], immed " and should use IF_AWR_CNS as format id->idInsFmt(IF_AWR_CNS); } sz = emitInsSizeAM(id, insCodeMI(ins), icon); id->idCodeSize(sz); } else { id = emitNewInstrAmd(attr, offset); id->idIns(ins); id->idInsFmt(IF_AWR_RRD); emitHandleMemOp(node, id, false); // may overwrite format id->idReg1(node->gtOp.gtOp2->gtRegNum); sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); } } break; case GT_STORE_LCL_VAR: { GenTreeLclVarCommon* varNode = node->AsLclVarCommon(); GenTree* data = node->gtOp.gtOp1->gtEffectiveVal(); codeGen->inst_set_SV_var(varNode); assert(varNode->gtRegNum == REG_NA); // stack store if (data->isContainedIntOrIImmed()) { emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int) data->AsIntConCommon()->IconValue()); codeGen->genUpdateLife(varNode); } else { assert(!data->isContained()); emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0); codeGen->genUpdateLife(varNode); } } return; default: unreached(); } dispIns(id); emitCurIGsize += sz; } CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*= EA_8BYTE*/) { NYI("emitLiteralConst"); return nullptr; } // Generates a float or double data section constant and returns field handle representing // the data offset to access the constant. This is called by emitInsBinary() in case // of contained float of double constants. CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/) { if (attr == EA_UNKNOWN) { attr = emitTypeSize(tree->TypeGet()); } else { assert(emitTypeSize(tree->TypeGet()) == attr); } double constValue = tree->gtDblCon.gtDconVal; void *cnsAddr; float f; bool dblAlign; if (attr == EA_4BYTE) { f = forceCastToFloat(constValue); cnsAddr = &f; dblAlign = false; } else { cnsAddr = &constValue; dblAlign = true; } // Access to inline data is 'abstracted' by a special type of static member // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference // to constant data, not a real static field. UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8; UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign); return emitComp->eeFindJitDataOffs(cnum); } // The callee must call genConsumeReg() for all sources, including address registers // of both source and destination, and genProduceReg() for the destination register, if any. regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) { // dst can only be a reg or modrm assert(!dst->isContained() || dst->isContainedIndir() || dst->isContainedLclField() || instrIs3opImul(ins)); // dst on these isn't really the dst // src can be anything but both src and dst cannot be addr modes // or at least cannot be contained addr modes if (dst->isContainedIndir()) assert(!src->isContainedIndir()); if (src->isContainedLclField()) assert(!dst->isContained()); // find which operand is a memory op (if any) // and what its base is GenTreeIndir* mem = nullptr; GenTree* memBase = nullptr; if (dst->isContainedIndir()) { mem = dst->AsIndir(); } else if (src->isContainedIndir()) { mem = src->AsIndir(); } if (mem) { memBase = mem->gtOp1; } // Find immed (if any) - it cannot be the dst // SSE2 instructions allow only the second operand to be a memory operand. GenTreeIntConCommon* intConst = nullptr; GenTreeDblCon *dblConst = nullptr; if (src->isContainedIntOrIImmed()) { intConst = src->AsIntConCommon(); } else if(src->isContainedFltOrDblImmed()) { dblConst = src->AsDblCon(); } // find local field if any GenTreeLclFld* lclField = nullptr; if (src->isContainedLclField()) { lclField = src->AsLclFld(); } else if (dst->isLclField() && dst->gtRegNum == REG_NA) { lclField = dst->AsLclFld(); } // First handle the simple non-memory cases // if ((mem == nullptr) && (lclField == nullptr)) { if (intConst != nullptr) { // reg, immed assert(!dst->isContained()); emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue()); // TODO-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or rsTrackRegIntCns(dst->gtRegNum, intConst->IconValue()) (as appropriate)? } else if (dblConst != nullptr) { // Emit a data section constant for float or double constant. CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblConst); emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0); } else { // reg, reg assert(!src->isContained() && !dst->isContained()); if (instrHasImplicitRegPairDest(ins)) { emitIns_R(ins, attr, src->gtRegNum); } else { emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum); } // ToDo-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or, for ins=MOV: regTracker.rsTrackRegCopy(dst->gtRegNum, src->gtRegNum); ? } return dst->gtRegNum; } // Next handle the cases where we have a stack based local memory operand // if (lclField) { unsigned offset = lclField->gtLclFld.gtLclOffs; unsigned varNum = lclField->gtLclVarCommon.gtLclNum; // Is the memory op in the source position? if (src->isContainedLclField()) { if (instrHasImplicitRegPairDest(ins)) { // src is a stack based local variable // dst is implicit - RDX:RAX emitIns_S(ins, attr, varNum, offset); } else { // src is a stack based local variable // dst is a register emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset); } } else // The memory op is in the dest position. { assert(dst->gtRegNum == REG_NA); // src could be int or reg if (src->isContainedIntOrIImmed()) { // src is an contained immediate // dst is a stack based local variable emitIns_S_I(ins, attr, varNum, offset, (int) src->gtIntConCommon.IconValue()); } else { // src is a register // dst is a stack based local variable assert(!src->isContained()); emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset); } } return dst->gtRegNum; } // Now we are left with only the cases where the instruction has some kind of a memory operand // assert(mem != nullptr); // Next handle the class static variable cases // if (memBase->OperGet() == GT_CLS_VAR_ADDR) { // Is the memory op in the source position? if (mem == src) { if (instrHasImplicitRegPairDest(ins)) { // src is a class static variable // dst is implicit - RDX:RAX emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0); } else { // src is a class static variable // dst is a register emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0); codeGen->genProduceReg(dst); } } else // The memory op is in the dest position. { if (src->isContained()) { // src is an contained immediate // dst is a class static variable emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int) src->gtIntConCommon.IconValue()); } else { // src is a register // dst is a class static variable emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0); } } return dst->gtRegNum; } // Finally we handle addressing modes case [regBase + regIndex*scale + const] // // We will have to construct and fill in the instruction descriptor for this case // instrDesc* id = nullptr; // Is the src an immediate constant? if (intConst) { // [mem], imm id = emitNewInstrAmdCns(attr, mem->Offset(), (int) intConst->IconValue()); } else // [mem], reg OR reg, [mem] { size_t offset = mem->Offset(); id = emitNewInstrAmd(attr, offset); id->idIns(ins); GenTree* regTree = (src == mem) ? dst : src; // there must be one non-contained src assert(!regTree->isContained()); id->idReg1(regTree->gtRegNum); } assert(id != nullptr); id->idIns(ins); // Set the instruction. emitHandleMemOp(mem, id, mem == src); // Determine the instruction format // insFormat fmt = IF_NONE; if (mem == dst) { if (!src->isContained()) { fmt = emitInsModeFormat(ins, IF_ARD_RRD); } else { fmt = emitInsModeFormat(ins, IF_ARD_CNS); } } else { assert(!dst->isContained()); if (instrHasImplicitRegPairDest(ins)) { fmt = emitInsModeFormat(ins, IF_ARD); } else { fmt = emitInsModeFormat(ins, IF_RRD_ARD); } } assert(fmt != IF_NONE); id->idInsFmt(fmt); // Determine the instruction size // UNATIVE_OFFSET sz = 0; if (intConst) { sz = emitInsSizeAM(id, insCodeMI(ins), (int) intConst->IconValue()); } else { if (mem == dst) { sz = emitInsSizeAM(id, insCodeMR(ins)); } else // mem == src { if (instrHasImplicitRegPairDest(ins)) { sz = emitInsSizeAM(id, insCode(ins)); } else { sz = emitInsSizeAM(id, insCodeRM(ins)); } } } assert(sz != 0); regNumber result = REG_NA; if (src == mem) { result = dst->gtRegNum; } id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; return result; } /** Emit logic for Read-Modify-Write Instructions * Responsible for encoding a single instruction that will perform an operation of the form * *addr op= operand for example * ADD [RAX], RCX * * Preconditions: Lowering has taken care of recognizing the StoreInd pattern of * StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) * The address to store is already sitting in a register. * * This is a no-produce operation, meaning that no register output will * be produced for future use in the code stream. * * The caller is responsible for calling genConsumeReg() on all source registers, and * genProduceReg on the target register, if any. */ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTree* src) { assert(dstAddr->isIndir()); GenTreeIndir* indir = dstAddr->AsIndir(); GenTreePtr mem = indir->Addr(); assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR || mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR || mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA || mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR || mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); instrDesc* id = nullptr; UNATIVE_OFFSET sz; size_t offset = 0; if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR) { offset = indir->Offset(); } // find immed (if any) - it cannot be a dst GenTreeIntConCommon* intConst = nullptr; if (src->isContainedIntOrIImmed()) { intConst = src->AsIntConCommon(); } if (intConst != nullptr) { id = emitNewInstrAmdCns(attr, offset, (int) intConst->IconValue()); } else { // ind, reg id = emitNewInstrAmd(attr, offset); // there must be one non-contained src assert(!src->isContained()); id->idReg1(src->gtRegNum); // fmt is set below } id->idIns(ins); emitHandleMemOp(dstAddr, id, false); if (src->isContainedIntOrIImmed()) { if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) { id->idInsFmt(IF_MRW_CNS); } else { id->idInsFmt(IF_ARW_CNS); } sz = emitInsSizeAM(id, insCodeMI(ins), (int) intConst->IconValue()); } else { if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR) { id->idInsFmt(IF_MRW_RRD); } else { id->idInsFmt(IF_ARW_RRD); } sz = emitInsSizeAM(id, insCodeMR(ins)); } id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } /** Emit logic for Read-Modify-Write Instructions on unary operators * Responsible for encoding a single instruction that will perform an operation of the form * *addr = UnaryOp *addr operand for example * NOT [RAX] * * Preconditions: Lowering has taken care of recognizing the StoreInd pattern of * StoreInd( AddressTree, UnOp( Ind ( AddressTree ) ) * The address to store is already sitting in a register. * * This is a no-produce operation, meaning that no register output will * be produced for future use in the code stream. */ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr) { assert(ins == INS_not || ins == INS_neg); assert(dstAddr->isIndir()); GenTreeIndir* indir = dstAddr->AsIndir(); GenTreePtr mem = indir->Addr(); assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR || mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR || mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR || mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA || mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); size_t offset = 0; if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR) { offset = indir->Offset(); } instrDesc* id = emitNewInstrAmd(attr, offset); emitHandleMemOp(dstAddr, id, true); id->idIns(ins); if(mem->OperGet() == GT_CLS_VAR_ADDR) { id->idInsFmt(IF_MRW); } else { id->idInsFmt(IF_ARW); } UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } #endif // !LEGACY_BACKEND #if FEATURE_STACK_FP_X87 /***************************************************************************** * * Add an instruction of the form "op ST(0),ST(n)". */ void emitter::emitIns_F0_F(instruction ins, unsigned fpreg) { UNATIVE_OFFSET sz = 2; instrDesc* id = emitNewInstr(); insFormat fmt = emitInsModeFormat(ins, IF_TRD_FRD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1((regNumber)fpreg); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add an instruction of the form "op ST(n),ST(0)". */ void emitter::emitIns_F_F0(instruction ins, unsigned fpreg) { UNATIVE_OFFSET sz = 2; instrDesc* id = emitNewInstr(); insFormat fmt = emitInsModeFormat(ins, IF_FRD_TRD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1((regNumber)fpreg); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } #endif // FEATURE_STACK_FP_X87 /***************************************************************************** * * Add an instruction referencing a single register. */ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) { emitAttr size = EA_SIZE(attr); assert(size <= EA_PTRSIZE); noway_assert(emitVerifyEncodable(ins, size, reg)); UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrTiny(attr); switch (ins) { case INS_inc: case INS_dec: #ifdef _TARGET_AMD64_ sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix) #else // !_TARGET_AMD64_ if (size == EA_1BYTE) sz = 2; // Use the long form as the small one has no 'w' bit else sz = 1; // Use short form #endif // !_TARGET_AMD64_ break; case INS_pop: case INS_pop_hide: case INS_push: case INS_push_hide: /* We don't currently push/pop small values */ assert(size == EA_PTRSIZE); sz = 1; break; default: /* All the sixteen INS_setCCs are contiguous. */ if (INS_seto <= ins && ins <= INS_setg) { // Rough check that we used the endpoints for the range check assert(INS_seto + 0xF == INS_setg); // The caller must specify EA_1BYTE for 'attr' assert(attr == EA_1BYTE); /* We expect this to always be a 'big' opcode */ assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000); size = attr; sz = 3; break; } else { sz = 2; break; } } insFormat fmt = emitInsModeFormat(ins, IF_RRD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(reg); // 16-bit operand instructions will need a prefix. // This refers to 66h size prefix override. if (size == EA_2BYTE) { sz += 1; } // Vex bytes sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); // REX byte if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) sz += emitGetRexPrefixSize(ins); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * Add an instruction referencing a register and a constant. */ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val) { emitAttr size = EA_SIZE(attr); // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins)); noway_assert(emitVerifyEncodable(ins, size, reg)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr))); #endif UNATIVE_OFFSET sz; instrDesc* id; insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); // Figure out the size of the instruction switch (ins) { case INS_mov: #ifdef _TARGET_AMD64_ // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0 // and this isn't a reloc constant. if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr)) { attr = size = EA_4BYTE; } if (size > EA_4BYTE) { sz = 9; // Really it is 10, but we'll add one more later break; } #endif // _TARGET_AMD64_ sz = 5; break; case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_RRW_SHF; sz = 3; val &= 0x7F; valInByte = true; // shift amount always placed in a byte break; default: if (EA_IS_CNS_RELOC(attr)) valInByte = false; // relocs can't be placed in a byte if (valInByte) { if (IsSSEOrAVXInstruction(ins)) sz = 5; else sz = 3; } else { if (reg == REG_EAX && !instrIs3opImul(ins)) { sz = 1; } else { sz = 2; } #ifdef _TARGET_AMD64_ if (size > EA_4BYTE) { // We special-case anything that takes a full 8-byte constant. sz += 4; } else #endif // _TARGET_AMD64_ { sz += EA_SIZE_IN_BYTES(attr); } } break; } // Vex prefix size sz += emitGetVexPrefixSize(ins, attr); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a 64-bit sized // operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target register. So we also // need to check if that built-in register is an extended register. if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) { sz += emitGetRexPrefixSize(ins); } #ifdef _TARGET_X86_ assert(reg < 8); #endif id = emitNewInstrSC(attr, val); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(reg); // 16-bit operand instructions will need a prefix if (size == EA_2BYTE) sz += 1; id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (reg == REG_ESP) { if (emitCntStackDepth) { if (ins == INS_sub) { S_UINT32 newStackLvl(emitCurStackLvl); newStackLvl += S_UINT32(val); noway_assert(!newStackLvl.IsOverflow()); emitCurStackLvl = newStackLvl.Value(); if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_add) { S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); noway_assert(!newStackLvl.IsOverflow()); emitCurStackLvl = newStackLvl.Value(); } } } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * Add an instruction referencing an integer constant. */ void emitter::emitIns_I(instruction ins, emitAttr attr, int val ) { UNATIVE_OFFSET sz; instrDesc* id; bool valInByte = ((signed char)val == val); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif if (EA_IS_CNS_RELOC(attr)) valInByte = false; // relocs can't be placed in a byte switch (ins) { case INS_loop: case INS_jge: sz = 2; break; case INS_ret: sz = 3; break; case INS_push_hide: case INS_push: sz = valInByte ? 2 : 5; break; default: NO_WAY("unexpected instruction"); } id = emitNewInstrSC(attr, val); id->idIns(ins); id->idInsFmt(IF_CNS); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * Add a "jump through a table" instruction. */ void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base) { assert(EA_SIZE(attr) == EA_4BYTE); UNATIVE_OFFSET sz = 3 + 4; const instruction ins = INS_i_jmp; if (IsExtendedReg(reg, attr)) sz += emitGetRexPrefixSize(ins); instrDesc* id = emitNewInstrAmd(attr, base); id->idIns(ins); id->idInsFmt(IF_ARD); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = reg; id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP; #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = base; #endif id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add an instruction with a static data member operand. If 'size' is 0, the * instruction operates on the address of the static member instead of its * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). */ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) { #if RELOC_SUPPORT // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); #endif UNATIVE_OFFSET sz; instrDesc* id; /* Are we pushing the offset of the class variable? */ if (EA_IS_OFFSET(attr)) { assert(ins == INS_push); sz = 1 + sizeof(void*); id = emitNewInstrDsp(EA_1BYTE, offs); id->idIns(ins); id->idInsFmt(IF_MRD_OFF); } else { #if FEATURE_STACK_FP_X87 insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD); #else // !FEATURE_STACK_FP_X87 insFormat fmt = emitInsModeFormat(ins, IF_MRD); #endif // !FEATURE_STACK_FP_X87 id = emitNewInstrDsp(attr, offs); id->idIns(ins); id->idInsFmt(fmt); sz = emitInsSizeCV(id, insCodeMR(ins)); } // Vex prefix size sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); if (TakesRexWPrefix(ins, attr)) { // REX.W prefix sz += emitGetRexPrefixSize(ins); } id->idAddr()->iiaFieldHnd = fldHnd; id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * Add an instruction with two register operands. */ void emitter::emitIns_R_R (instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) { /* We don't want to generate any useless mov instructions! */ assert(ins != INS_mov || reg1 != reg2); emitAttr size = EA_SIZE(attr); assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, reg1, reg2)); UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); /* Special case: "XCHG" uses a different format */ insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD); instrDesc* id = emitNewInstrTiny(attr); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(reg1); id->idReg2(reg2); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add an instruction with two register operands and an integer constant. */ void emitter::emitIns_R_R_I (instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) { // SSE2 version requires 5 bytes and AVX version 6 bytes UNATIVE_OFFSET sz = 4; if (IsSSEOrAVXInstruction(ins)) { sz = UseAVX() ? 6 : 5; } #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif instrDesc* id = emitNewInstrSC(attr, ival); // REX prefix if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) { sz += emitGetRexPrefixSize(ins); } id->idIns(ins); id->idInsFmt(IF_RRW_RRW_CNS); id->idReg1(reg1); id->idReg2(reg2); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } #ifdef FEATURE_AVX_SUPPORT /***************************************************************************** * * Add an instruction with three register operands. */ void emitter::emitIns_R_R_R (instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2) { assert(IsSSEOrAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); //Currently vex prefix only use three bytes mode. //size = vex + opcode + ModR/M = 3 + 1 + 1 = 5 //TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future UNATIVE_OFFSET sz = 5; instrDesc* id = emitNewInstr(attr); id->idIns(ins); id->idInsFmt(IF_RWR_RRD_RRD); id->idReg1(targetReg); id->idReg2(reg1); id->idReg3(reg2); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } #endif /***************************************************************************** * * Add an instruction with a register + static member operands. */ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs) { #if RELOC_SUPPORT // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); #endif emitAttr size = EA_SIZE(attr); assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, reg)); UNATIVE_OFFSET sz; instrDesc* id; // Are we MOV'ing the offset of the class variable into EAX? if (EA_IS_OFFSET(attr)) { id = emitNewInstrDsp(EA_1BYTE, offs); id->idIns(ins); id->idInsFmt(IF_RWR_MRD_OFF); assert(ins == INS_mov && reg == REG_EAX); // Special case: "mov eax, [addr]" is smaller sz = 1 + sizeof(void*); } else { insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD); id = emitNewInstrDsp(attr, offs); id->idIns(ins); id->idInsFmt(fmt); #ifdef _TARGET_X86_ // Special case: "mov eax, [addr]" is smaller. // This case is not enabled for amd64 as it always uses RIP relative addressing // and it results in smaller instruction size than encoding 64-bit addr in the // instruction. if (ins == INS_mov && reg == REG_EAX) { sz = 1 + sizeof(void*); if (size == EA_2BYTE) sz += 1; } else #endif //_TARGET_X86_ { sz = emitInsSizeCV(id, insCodeRM(ins)); } // Special case: mov reg, fs:[ddd] if (fldHnd == FLD_GLOBAL_FS) sz += 1; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); // REX prefix if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) { sz += emitGetRexPrefixSize(ins); } id->idReg1(reg); id->idCodeSize(sz); id->idAddr()->iiaFieldHnd = fldHnd; dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add an instruction with a static member + register operands. */ void emitter::emitIns_C_R (instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) { #if RELOC_SUPPORT // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); #endif emitAttr size = EA_SIZE(attr); #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87 // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory assert(size <= EA_8BYTE); #else assert(size <= EA_PTRSIZE); #endif noway_assert(emitVerifyEncodable(ins, size, reg)); instrDesc* id = emitNewInstrDsp(attr, offs); insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD); id->idIns(ins); id->idInsFmt(fmt); UNATIVE_OFFSET sz; #ifdef _TARGET_X86_ // Special case: "mov [addr], EAX" is smaller. // This case is not enable for amd64 as it always uses RIP relative addressing // and it will result in smaller instruction size than encoding 64-bit addr in // the instruction. if (ins == INS_mov && reg == REG_EAX) { sz = 1 + sizeof(void*); if (size == EA_2BYTE) sz += 1; } else #endif //_TARGET_X86_ { sz = emitInsSizeCV(id, insCodeMR(ins)); } // Special case: mov reg, fs:[ddd] if (fldHnd == FLD_GLOBAL_FS) { sz += 1; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); // REX prefix if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) { sz += emitGetRexPrefixSize(ins); } id->idReg1(reg); id->idCodeSize(sz); id->idAddr()->iiaFieldHnd = fldHnd; dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add an instruction with a static member + constant. */ void emitter::emitIns_C_I (instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val) { #if RELOC_SUPPORT // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_MRW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_MRD_CNS); break; } instrDesc* id = emitNewInstrCnsDsp(attr, val, offs); id->idIns(ins); id->idInsFmt(fmt); size_t code = insCodeMI(ins); UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val); #ifdef _TARGET_AMD64_ // Vex prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); // REX prefix, if not already included in "code" if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0) { sz += emitGetRexPrefixSize(ins); } #endif // _TARGET_AMD64_ id->idAddr()->iiaFieldHnd = fldHnd; id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_J_S (instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs) { assert(ins == INS_mov); assert(dst->bbFlags & BBF_JMP_TARGET); instrDescLbl* id = emitNewInstrLbl(); id->idIns(ins); id->idInsFmt(IF_SWR_LABEL); id->idAddr()->iiaBBlabel = dst; /* The label reference is always long */ id->idjShort = 0; id->idjKeepLong = 1; /* Record the current IG and offset within it */ id->idjIG = emitCurIG; id->idjOffs = emitCurIGsize; /* Append this instruction to this IG's jump list */ id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs); id->dstLclVar.initLclVarAddr(varx, offs); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif #if EMITTER_STATS emitTotalIGjmps++; #endif #if RELOC_SUPPORT // Storing the address of a basicBlock will need a reloc // as the instruction uses the absolute address, // not a relative address. // // On Amd64, Absolute code addresses should always go through a reloc to // to be encoded as RIP rel32 offset. #ifndef _TARGET_AMD64_ if (emitComp->opts.compReloc) #endif { id->idSetIsDspReloc(); } #endif //RELOC_SUPPORT id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Add a label instruction. */ void emitter::emitIns_R_L (instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) { assert(ins == INS_lea); assert(dst->bbFlags & BBF_JMP_TARGET); instrDescJmp* id = emitNewInstrJmp(); id->idIns(ins); id->idReg1(reg); id->idInsFmt(IF_RWR_LABEL); id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE id->idAddr()->iiaBBlabel = dst; /* The label reference is always long */ id->idjShort = 0; id->idjKeepLong = 1; /* Record the current IG and offset within it */ id->idjIG = emitCurIG; id->idjOffs = emitCurIGsize; /* Append this instruction to this IG's jump list */ id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; #ifdef DEBUG // Mark the catch return if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET) { id->idDebugOnlyInfo()->idCatchRet = true; } #endif // DEBUG #if EMITTER_STATS emitTotalIGjmps++; #endif UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); // Set the relocation flags - these give hint to zap to perform // relocation of the specified 32bit address. id->idSetRelocFlags(attr); dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * The following adds instructions referencing address modes. */ void emitter::emitIns_I_AR (instruction ins, emitAttr attr, int val, regNumber reg, int disp, int memCookie, void* clsCookie) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: #ifdef _TARGET_X86_ assert(val != 1); #endif fmt = IF_ARW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_ARD_CNS); break; } /* Useful if you want to trap moves with 0 constant if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE) { printf("MOV 0\n"); } */ UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmdCns(attr, disp, val); id->idIns(ins); id->idInsFmt(fmt); assert((memCookie == NULL) == (clsCookie == NULL)); #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = memCookie; id->idDebugOnlyInfo()->idClsCookie = clsCookie; #endif id->idAddr()->iiaAddrMode.amBaseReg = reg; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_I_AI (instruction ins, emitAttr attr, int val, ssize_t disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_ARW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_ARD_CNS); break; } /* Useful if you want to trap moves with 0 constant if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE) { printf("MOV 0\n"); } */ UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmdCns(attr, disp, val); id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_AR (instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); if (ins == INS_lea) { if (ireg == base && disp == 0) { // Maybe the emitter is not the common place for this optimization, but it's a better choke point // for all the emitIns(ins, tree), we would have to be analyzing at each call site // return; } } UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); assert((memCookie == NULL) == (clsCookie == NULL)); #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = memCookie; id->idDebugOnlyInfo()->idClsCookie = clsCookie; #endif id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_AI (instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_AR_R (instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt; if (ireg == REG_NA) { #if FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD); #else // !FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD); #endif // !FEATURE_STACK_FP_X87 } else { fmt = emitInsModeFormat(ins, IF_ARD_RRD); assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); id->idReg1(ireg); } id->idIns(ins); id->idInsFmt(fmt); assert((memCookie == NULL) == (clsCookie == NULL)); #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = memCookie; id->idDebugOnlyInfo()->idClsCookie = clsCookie; #endif id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } void emitter::emitIns_AI_R (instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt; if (ireg == REG_NA) { #if FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD); #else // FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD); #endif // FEATURE_STACK_FP_X87 } else { fmt = emitInsModeFormat(ins, IF_ARD_RRD); assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); id->idReg1(ireg); } id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } void emitter::emitIns_I_ARR (instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_ARW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_ARD_CNS); break; } UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmdCns(attr, disp, val); id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = reg; id->idAddr()->iiaAddrMode.amIndxReg = rg2; id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_ARR_R (instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt; if (ireg == REG_NA) { #if FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD); #else // FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD); #endif // FEATURE_STACK_FP_X87 } else { fmt = emitInsModeFormat(ins, IF_ARD_RRD); assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); id->idReg1(ireg); } id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = reg; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } void emitter::emitIns_I_ARX (instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_ARW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_ARD_CNS); break; } UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmdCns(attr, disp, val); id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = reg; id->idAddr()->iiaAddrMode.amIndxReg = rg2; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_ARX (instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_ARX_R (instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt; if (ireg == REG_NA) { #if FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD); #else // !FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD); #endif // !FEATURE_STACK_FP_X87 } else { fmt = emitInsModeFormat(ins, IF_ARD_RRD); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); id->idReg1(ireg); } id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } void emitter::emitIns_I_AX (instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_ARW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_ARD_CNS); break; } UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmdCns(attr, disp, val); id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = reg; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_AX (instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp) { assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = reg; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_AX_R (instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstrAmd(attr, disp); insFormat fmt; if (ireg == REG_NA) { #if FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD); #else // !FEATURE_STACK_FP_X87 fmt = emitInsModeFormat(ins, IF_ARD); #endif // !FEATURE_STACK_FP_X87 } else { fmt = emitInsModeFormat(ins, IF_ARD_RRD); noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); id->idReg1(ireg); } id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; id->idAddr()->iiaAddrMode.amIndxReg = reg; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * The following add instructions referencing stack-based local variables. */ void emitter::emitIns_S (instruction ins, emitAttr attr, int varx, int offs) { instrDesc* id = emitNewInstr(attr); UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs); #if FEATURE_STACK_FP_X87 insFormat fmt = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD); #else // !FEATURE_STACK_FP_X87 insFormat fmt = emitInsModeFormat(ins, IF_SRD); #endif // !FEATURE_STACK_FP_X87 // 16-bit operand instructions will need a prefix if (EA_SIZE(attr) == EA_2BYTE) { sz += 1; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); // 64-bit operand instructions will need a REX.W prefix if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); } id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } else if (ins == INS_pop) { emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } void emitter::emitIns_S_R (instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) { instrDesc* id = emitNewInstr(attr); UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD); // 16-bit operand instructions will need a prefix if (EA_SIZE(attr) == EA_2BYTE) { sz++; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); // 64-bit operand instructions will need a REX.W prefix if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) { sz += emitGetRexPrefixSize(ins); } id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_R_S (instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) { emitAttr size = EA_SIZE(attr); noway_assert(emitVerifyEncodable(ins, size, ireg)); instrDesc* id = emitNewInstr(attr); UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD); // Most 16-bit operand instructions need a prefix if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) { sz++; } // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); // 64-bit operand instructions will need a REX.W prefix if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) { sz += emitGetRexPrefixSize(ins); } id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif dispIns(id); emitCurIGsize += sz; } void emitter::emitIns_S_I (instruction ins, emitAttr attr, int varx, int offs, int val) { #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); #endif insFormat fmt; switch (ins) { case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: assert(val != 1); fmt = IF_SRW_SHF; val &= 0x7F; break; default: fmt = emitInsModeFormat(ins, IF_SRD_CNS); break; } instrDesc* id = emitNewInstrCns(attr, val); id->idIns(ins); id->idInsFmt(fmt); UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val); // VEX prefix sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); // 64-bit operand instructions will need a REX.W prefix if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); } id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif dispIns(id); emitCurIGsize += sz; } /***************************************************************************** * * Record that a jump instruction uses the short encoding * */ void emitter::emitSetShortJump(instrDescJmp* id) { if (id->idjKeepLong) return; id->idjShort = true; } /***************************************************************************** * * Add a jmp instruction. */ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */) { UNATIVE_OFFSET sz; instrDescJmp* id = emitNewInstrJmp(); assert(dst->bbFlags & BBF_JMP_TARGET); id->idIns(ins); id->idInsFmt(IF_LABEL); id->idAddr()->iiaBBlabel = dst; #ifdef DEBUG // Mark the finally call if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY) { id->idDebugOnlyInfo()->idFinallyCall = true; } #endif // DEBUG /* Assume the jump will be long */ id->idjShort = 0; id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); /* Record the jump's IG and offset within it */ id->idjIG = emitCurIG; id->idjOffs = emitCurIGsize; /* Append this jump to this IG's jump list */ id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; #if EMITTER_STATS emitTotalIGjmps++; #endif /* Figure out the max. size of the jump/call instruction */ if (ins == INS_call) { sz = CALL_INST_SIZE; } else if (ins == INS_push || ins == INS_push_hide) { #if RELOC_SUPPORT // Pushing the address of a basicBlock will need a reloc // as the instruction uses the absolute address, // not a relative address if (emitComp->opts.compReloc) id->idSetIsDspReloc(); #endif sz = PUSH_INST_SIZE; } else { insGroup* tgt; /* This is a jump - assume the worst */ sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE; /* Can we guess at the jump distance? */ tgt = (insGroup*)emitCodeGetCookie(dst); if (tgt) { int extra; UNATIVE_OFFSET srcOffs; int jmpDist; assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL); /* This is a backward jump - figure out the distance */ srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL; /* Compute the distance estimate */ jmpDist = srcOffs - tgt->igOffs; assert((int)jmpDist > 0); /* How much beyond the max. short distance does the jump go? */ extra = jmpDist + JMP_DIST_SMALL_MAX_NEG; #if DEBUG_EMIT if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) { if (INTERESTING_JUMP_NUM == 0) printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum); printf("[0] Jump source is at %08X\n", srcOffs); printf("[0] Label block is at %08X\n", tgt->igOffs); printf("[0] Jump distance - %04X\n", jmpDist); if (extra > 0) printf("[0] Distance excess = %d \n", extra); } #endif if (extra <= 0 && !id->idjKeepLong) { /* Wonderful - this jump surely will be short */ emitSetShortJump(id); sz = JMP_SIZE_SMALL; } } #if DEBUG_EMIT else { if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) { if (INTERESTING_JUMP_NUM == 0) printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum); printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize, emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL); printf("[0] Label block is unknown\n"); } } #endif } id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } #endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** * * Add a call instruction (direct or indirect). * argSize<0 means that the caller will pop the arguments * * The other arguments are interpreted depending on callType as shown: * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. * * EC_FUNC_TOKEN : addr is the method address * EC_FUNC_TOKEN_INDIR : addr is the indirect method address * EC_FUNC_ADDR : addr is the absolute address of the function * EC_FUNC_VIRTUAL : "call [ireg+disp]" * * If callType is one of these emitCallTypes, addr has to be NULL. * EC_INDIR_R : "call ireg". * EC_INDIR_SR : "call lcl" (eg. call [ebp-8]). * EC_INDIR_C : "call clsVar" (eg. call [clsVarAddr]) * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]" * */ void emitter::emitIns_Call(EmitCallType callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE void* addr, ssize_t argSize, emitAttr retSize, VARSET_VALARG_TP ptrVars, regMaskTP gcrefRegs, regMaskTP byrefRegs, IL_OFFSETX ilOffset /* = BAD_IL_OFFSET */, regNumber ireg /* = REG_NA */, regNumber xreg /* = REG_NA */, unsigned xmul /* = 0 */, ssize_t disp /* = 0 */, bool isJump /* = false */, bool isNoGC /* = false */) { /* Sanity check the arguments depending on callType */ assert(callType < EC_COUNT); assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0)); assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == NULL); assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); assert(callType != EC_INDIR_SR || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount)); assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0)); // Our stack level should be always greater than the bytes of arguments we push. Just // a sanity test. assert((unsigned) abs((signed)argSize) <= codeGen->genStackLevel); #if STACK_PROBES if (emitComp->opts.compNeedStackProbes) { // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe // Else, just make sure the prolog does a probe for us. Invariant we're trying // to get is that at any point we go out to unmanaged code, there is at least // CORINFO_STACKPROBE_DEPTH bytes of stack available. // // The reason why we are not doing one probe for the max size at the prolog // is that when don't have the max depth precomputed (it can depend on codegen), // and we need it at the time we generate locallocs // // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in // account everything except for the arguments of a callee. // // // if ( (sizeof(void*) + // return address for call emitComp->genStackLevel + // Current stack level. This gets resetted on every // localloc and on the prolog (invariant is that // genStackLevel is 0 on basic block entry and exit and // after any alloca). genStackLevel will include any arguments // to the call, so we will insert an aditional probe if // we've consumed more than JIT_RESERVED_STACK bytes // of stack, which is what the prolog probe covers (in // addition to the EE requested size) (emitComp->compHndBBtabCount * sizeof(void*)) // Hidden slots for calling finallys ) >= JIT_RESERVED_STACK) { // This happens when you have a call with a lot of arguments or a call is done // when there's a lot of stuff pushed on the stack (for example a call whos returned // value is an argument of another call that has pushed stuff on the stack) // This should't be very frequent. // For different values of JIT_RESERVED_STACK // // For mscorlib (109605 calls) // // 14190 probes in prologs (56760 bytes of code) // // JIT_RESERVED_STACK = 16 : 5452 extra probes // JIT_RESERVED_STACK = 32 : 1084 extra probes // JIT_RESERVED_STACK = 64 : 1 extra probes // JIT_RESERVED_STACK = 96 : 0 extra probes emitComp->genGenerateStackProbe(); } else { if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog) { if (emitComp->compStackProbePrologDone) { // We already generated a probe and this call is not happening // at a depth >= JIT_RESERVED_STACK, so nothing to do here } else { // 3 possible ways to get here: // - We are in an epilog and haven't generated a probe in the prolog. // This shouldn't happen as we don't generate any calls in epilog. // - We are in the prolog, but doing a call before generating the probe. // This shouldn't happen at all. // - We are in the prolog, did not generate a probe but now we need // to generate a probe because we need a call (eg: profiler). We'll // need a probe. // // In any case, we need a probe // Ignore the profiler callback for now. if (!emitComp->compIsProfilerHookNeeded()) { assert(!"We do not expect to get here"); emitComp->genGenerateStackProbe(); } } } else { // We will need a probe and will generate it in the prolog emitComp->genNeedPrologStackProbe = true; } } } #endif // STACK_PROBES int argCnt; UNATIVE_OFFSET sz; instrDesc* id; /* This is the saved set of registers after a normal call */ unsigned savedSet = RBM_CALLEE_SAVED; /* some special helper calls have a different saved set registers */ if (isNoGC) { // Get the set of registers that this call kills and remove it from the saved set. savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd)); } else { assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd))); } /* Trim out any callee-trashed registers from the live set */ gcrefRegs &= savedSet; byrefRegs &= savedSet; #ifdef DEBUG if (EMIT_GC_VERBOSE) { printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); dumpConvertedVarSet(emitComp, ptrVars); printf(", gcrefRegs="); printRegMaskInt(gcrefRegs); emitDispRegSet (gcrefRegs); printf(", byrefRegs="); printRegMaskInt(byrefRegs); emitDispRegSet (byrefRegs); printf("\n"); } #endif assert( argSize % sizeof(void*) == 0); argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide #ifdef DEBUGGING_SUPPORT /* Managed RetVal: emit sequence point for the call */ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) { codeGen->genIPmappingAdd(ilOffset, false); } #endif /* We need to allocate the appropriate instruction descriptor based on whether this is a direct/indirect call, and whether we need to record an updated set of live GC variables. The stats for a ton of classes is as follows: Direct call w/o GC vars 220,216 Indir. call w/o GC vars 144,781 Direct call with GC vars 9,440 Indir. call with GC vars 5,768 */ if (callType >= EC_FUNC_VIRTUAL) { /* Indirect call, virtual calls */ assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR || callType == EC_INDIR_C || callType == EC_INDIR_ARD); id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize); } else { /* Helper/static/nonvirtual/function calls (direct or through handle), and calls to an absolute addr. */ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR); id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize); } /* Update the emitter's live GC ref sets */ VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); emitThisGCrefRegs = gcrefRegs; emitThisByrefRegs = byrefRegs; /* Set the instruction - special case jumping a function */ instruction ins = INS_call; if (isJump) { assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR); if (callType == EC_FUNC_TOKEN) ins = INS_l_jmp; else ins = INS_i_jmp; } id->idIns(ins); id->idSetIsNoGC(isNoGC); // Record the address: method, indirection, or funcptr if (callType >= EC_FUNC_VIRTUAL) { // This is an indirect call (either a virtual call or func ptr call) switch (callType) { case EC_INDIR_C: // Indirect call using an absolute code address. // Must be marked as relocatable and is done at the // branch target location. goto CALL_ADDR_MODE; case EC_INDIR_R: // the address is in a register id->idSetIsCallRegPtr(); __fallthrough; case EC_INDIR_ARD: // the address is an indirection goto CALL_ADDR_MODE; case EC_INDIR_SR: // the address is in a lcl var id->idInsFmt(IF_SRD); // disp is really a lclVarNum noway_assert((unsigned)disp == (size_t)disp); id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0); sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0); break; case EC_FUNC_VIRTUAL: CALL_ADDR_MODE: // fall-through // The function is "ireg" if id->idIsCallRegPtr(), // else [ireg+xmul*xreg+disp] id->idInsFmt(IF_ARD); id->idAddr()->iiaAddrMode.amBaseReg = ireg; id->idAddr()->iiaAddrMode.amIndxReg = xreg; id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1; sz = emitInsSizeAM(id, insCodeMR(INS_call)); if (ireg == REG_NA && xreg == REG_NA) { if (codeGen->genCodeIndirAddrNeedsReloc(disp)) { id->idSetIsDspReloc(); } #ifdef _TARGET_AMD64_ else { // An absolute indir address that doesn't need reloc should fit within 32-bits // to be encoded as offset relative to zero. This addr mode requires an extra // SIB byte noway_assert((int)addr == (size_t)addr); sz++; } #endif //_TARGET_AMD64_ } break; default: NO_WAY("unexpected instruction"); break; } } else if (callType == EC_FUNC_TOKEN_INDIR) { /* "call [method_addr]" */ assert(addr != nullptr); id->idInsFmt(IF_METHPTR); id->idAddr()->iiaAddr = (BYTE*)addr; sz = 6; #if RELOC_SUPPORT // Since this is an indirect call through a pointer and we don't // currently pass in emitAttr into this function, we query codegen // whether addr needs a reloc. if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr)) { id->idSetIsDspReloc(); } #ifdef _TARGET_AMD64_ else { // An absolute indir address that doesn't need reloc should fit within 32-bits // to be encoded as offset relative to zero. This addr mode requires an extra // SIB byte noway_assert((int)addr == (size_t)addr); sz++; } #endif //_TARGET_AMD64_ #endif //RELOC_SUPPORT } else { /* This is a simple direct call: "call helper/method/addr" */ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR); assert(addr != nullptr); id->idInsFmt(IF_METHOD); sz = 5; id->idAddr()->iiaAddr = (BYTE*)addr; if (callType == EC_FUNC_ADDR) { id->idSetIsCallAddr(); } #if RELOC_SUPPORT // Direct call to a method and no addr indirection is needed. if (codeGen->genCodeAddrNeedsReloc((size_t)addr)) { id->idSetIsDspReloc(); } #endif } #ifdef DEBUG if (emitComp->verbose&&0) { if (id->idIsLargeCall()) { if (callType >= EC_FUNC_VIRTUAL) printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, VarSetOps::ToString(emitComp,((instrDescCGCA*)id)->idcGCvars)); else printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, VarSetOps::ToString(emitComp,((instrDescCGCA*)id)->idcGCvars)); } } #endif #if defined(DEBUG) || defined(LATE_DISASM) id->idDebugOnlyInfo()->idMemCookie = (size_t) methHnd; // method token id->idDebugOnlyInfo()->idClsCookie = 0; id->idDebugOnlyInfo()->idCallSig = sigInfo; #endif #if defined(LATE_DISASM) if (addr != nullptr) { codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); } #endif // defined(LATE_DISASM) id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; #if !FEATURE_FIXED_OUT_ARGS /* The call will pop the arguments */ if (emitCntStackDepth && argSize > 0) { noway_assert((ssize_t)emitCurStackLvl >= argSize); emitCurStackLvl -= (int)argSize; assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS } #ifdef DEBUG /***************************************************************************** * * The following called for each recorded instruction -- use for debugging. */ void emitter::emitInsSanityCheck(instrDesc* id) { // make certain you only try to put relocs on things that can have them. ID_OPS idOp = (ID_OPS) emitFmtToOps[id->idInsFmt()]; if ((idOp == ID_OP_SCNS) && id->idIsLargeCns()) { idOp = ID_OP_CNS; } if (!id->idIsTiny()) { if (id->idIsDspReloc()) { assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP || idOp == ID_OP_LBL); } if (id->idIsCnsReloc()) { assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP); } } } #endif /***************************************************************************** * * Return the allocated size (in bytes) of the given instruction descriptor. */ size_t emitter::emitSizeOfInsDsc(instrDesc* id) { if (emitIsTinyInsDsc(id)) return TINY_IDSC_SIZE; if (emitIsScnsInsDsc(id)) return SMALL_IDSC_SIZE; assert((unsigned)id->idInsFmt() < emitFmtCount); ID_OPS idOp = (ID_OPS) emitFmtToOps[id->idInsFmt()]; // An INS_call instruction may use a "fat" direct/indirect call descriptor // except for a local call to a label (i.e. call to a finally) // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the // INS_call instruction always uses one of these idOps if (id->idIns() == INS_call) { assert(idOp == ID_OP_CALL || // is a direct call idOp == ID_OP_SPEC || // is a indirect call idOp == ID_OP_JMP ); // is a local call to finally clause } switch (idOp) { case ID_OP_NONE: break; case ID_OP_LBL: return sizeof(instrDescLbl); case ID_OP_JMP: return sizeof(instrDescJmp); case ID_OP_CALL: case ID_OP_SPEC: if (id->idIsLargeCall()) { /* Must be a "fat" indirect call descriptor */ return sizeof(instrDescCGCA); } __fallthrough; case ID_OP_SCNS: case ID_OP_CNS: case ID_OP_DSP: case ID_OP_DSP_CNS: case ID_OP_AMD: case ID_OP_AMD_CNS: if (id->idIsLargeCns()) { if (id->idIsLargeDsp()) return sizeof(instrDescCnsDsp); else return sizeof(instrDescCns); } else { if (id->idIsLargeDsp()) return sizeof(instrDescDsp); else return sizeof(instrDesc); } default: NO_WAY("unexpected instruction descriptor format"); break; } return sizeof(instrDesc); } /*****************************************************************************/ #ifdef DEBUG /***************************************************************************** * * Return a string that represents the given register. */ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) { static char rb[2][128]; static unsigned char rbc = 0; const char* rn = emitComp->compRegVarName(reg, varName); #ifdef _TARGET_AMD64_ char suffix = '\0'; switch (EA_SIZE(attr)) { case EA_32BYTE: return emitYMMregName(reg); break; case EA_16BYTE: return emitXMMregName(reg); break; case EA_8BYTE: break; case EA_4BYTE: if (reg > REG_R15) break; if (reg > REG_RDI) { suffix = 'd'; goto APPEND_SUFFIX; } rbc = (rbc+1)%2; rb[rbc][0] = 'e'; rb[rbc][1] = rn[1]; rb[rbc][2] = rn[2]; rb[rbc][3] = 0; rn = rb[rbc]; break; case EA_2BYTE: if (reg > REG_RDI) { suffix = 'w'; goto APPEND_SUFFIX; } rn++; break; case EA_1BYTE: if (reg > REG_RDI) { suffix = 'b'; APPEND_SUFFIX: rbc = (rbc+1)%2; rb[rbc][0] = rn[0]; rb[rbc][1] = rn[1]; if (rn[2]) { assert(rn[3] == 0); rb[rbc][2] = rn[2]; rb[rbc][3] = suffix; rb[rbc][4] = 0; } else { rb[rbc][2] = suffix; rb[rbc][3] = 0; } } else { rbc = (rbc+1)%2; rb[rbc][0] = rn[1]; if (reg < 4) { rb[rbc][1] = 'l'; rb[rbc][2] = 0; } else { rb[rbc][1] = rn[2]; rb[rbc][2] = 'l'; rb[rbc][3] = 0; } } rn = rb[rbc]; break; default: break; } #endif // _TARGET_AMD64_ #ifdef _TARGET_X86_ assert(strlen(rn) >= 3); switch (EA_SIZE(attr)) { case EA_4BYTE: break; case EA_2BYTE: rn++; break; case EA_1BYTE: rbc = (rbc+1)%2; rb[rbc][0] = rn[1]; rb[rbc][1] = 'l'; strcpy_s(&rb[rbc][2], sizeof(rb[0])-2, rn+3); rn = rb[rbc]; break; default: break; } #endif // _TARGET_X86_ #if 0 // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively, // however it's possibly not interesting most of the time. if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr)) { if (rn != rb[rbc]) { rbc = (rbc+1)%2; strcpy_s(rb[rbc], sizeof(rb[rbc]), rn); rn = rb[rbc]; } if (EA_IS_GCREF(attr)) { strcat_s(rb[rbc], sizeof(rb[rbc]), "*"); } else if (EA_IS_BYREF(attr)) { strcat_s(rb[rbc], sizeof(rb[rbc]), "^"); } } #endif // 0 return rn; } /***************************************************************************** * * Return a string that represents the given FP register. */ const char* emitter::emitFPregName(unsigned reg, bool varName) { assert(reg < REG_COUNT); return emitComp->compFPregVarName((regNumber)(reg), varName); } /***************************************************************************** * * Return a string that represents the given XMM register. */ const char* emitter::emitXMMregName(unsigned reg) { static const char* const regNames[] = { #define REGDEF(name, rnum, mask, sname) "x" sname, #ifndef LEGACY_BACKEND #include "register.h" #else // LEGACY_BACKEND #include "registerxmm.h" #endif // LEGACY_BACKEND }; assert(reg < REG_COUNT); assert(reg < sizeof(regNames)/sizeof(regNames[0])); return regNames[reg]; } /***************************************************************************** * * Return a string that represents the given YMM register. */ const char* emitter::emitYMMregName(unsigned reg) { static const char* const regNames[] = { #define REGDEF(name, rnum, mask, sname) "y" sname, #ifndef LEGACY_BACKEND #include "register.h" #else // LEGACY_BACKEND #include "registerxmm.h" #endif // LEGACY_BACKEND }; assert(reg < REG_COUNT); assert(reg < sizeof(regNames)/sizeof(regNames[0])); return regNames[reg]; } /***************************************************************************** * * Display a static data member reference. */ void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */) { int doffs; /* Filter out the special case of fs:[offs] */ // Munge any pointers if we want diff-able disassembly if (emitComp->opts.disDiffable) { ssize_t top12bits = (offs >> 20); if ((top12bits != 0) && (top12bits != -1)) offs = 0xD1FFAB1E; } if (fldHnd == FLD_GLOBAL_FS) { printf("FS:[0x%04X]", offs); return; } if (fldHnd == FLD_GLOBAL_DS) { printf("[0x%04X]", offs); return; } printf("["); doffs = Compiler::eeGetJitDataOffs(fldHnd); #ifdef RELOC_SUPPORT if (reloc) printf("reloc "); #endif if (doffs >= 0) { if (doffs & 1) printf("@CNS%02u", doffs-1); else printf("@RWD%02u", doffs); if (offs) printf("%+Id", offs); } else { printf("classVar[%#x]", emitComp->dspPtr(fldHnd)); if (offs) printf("%+Id", offs); } printf("]"); if (emitComp->opts.varNames && offs < 0) { printf("'%s", emitComp->eeGetFieldName(fldHnd)); if (offs) printf("%+Id", offs); printf("'"); } } /***************************************************************************** * * Display a stack frame reference. */ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) { int addr; bool bEBP; printf("["); if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT) { if (varx < 0) printf("TEMP_%02u", -varx); else printf("V%02u", +varx); if (disp < 0) printf("-0x%X", -disp); else if (disp > 0) printf("+0x%X", +disp); } if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) { if (!asmfm) printf(" "); addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp; if (bEBP) { printf(STR_FPBASE); if (addr < 0) printf("-%02XH", -addr); else if (addr > 0) printf("+%02XH", addr); } else { /* Adjust the offset by amount currently pushed on the stack */ printf(STR_SPBASE); if (addr < 0) printf("-%02XH", -addr); else if (addr > 0) printf("+%02XH", addr); #if !FEATURE_FIXED_OUT_ARGS if (emitCurStackLvl) printf("+%02XH", emitCurStackLvl); #endif // !FEATURE_FIXED_OUT_ARGS } } printf("]"); if (varx >= 0 && emitComp->opts.varNames) { LclVarDsc* varDsc; const char* varName; assert((unsigned)varx < emitComp->lvaCount); varDsc = emitComp->lvaTable + varx; varName = emitComp->compLocalVarName(varx, offs); if (varName) { printf("'%s", varName); if (disp < 0) printf("-%d", -disp); else if (disp > 0) printf("+%d", +disp); printf("'"); } } } /***************************************************************************** * * Display an reloc value * If we are formatting for an assembly listing don't print the hex value * since it will prevent us from doing assembly diffs */ void emitter::emitDispReloc(ssize_t value) { if (emitComp->opts.disAsm) { printf("(reloc)"); } else { printf("(reloc 0x%Ix)", emitComp->dspPtr(value)); } } /***************************************************************************** * * Display an address mode. */ void emitter::emitDispAddrMode(instrDesc* id, bool noDetail) { bool nsep = false; ssize_t disp; unsigned jtno = 0; dataSection* jdsc = 0; /* The displacement field is in an unusual place for calls */ disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id); /* Display a jump table label if this is a switch table jump */ if (id->idIns() == INS_i_jmp) { UNATIVE_OFFSET offs = 0; /* Find the appropriate entry in the data section list */ for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext) { UNATIVE_OFFSET size = jdsc->dsSize; /* Is this a label table? */ if (size & 1) { size--; jtno++; if (offs == id->idDebugOnlyInfo()->idMemCookie) break; } offs += size; } /* If we've found a matching entry then is a table jump */ if (jdsc) { #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { printf("reloc "); } #endif printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie); } disp -= id->idDebugOnlyInfo()->idMemCookie; } bool frameRef = false; printf("["); if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA) { printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg)); nsep = true; if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP) frameRef = true; else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP) frameRef = true; } if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA) { size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale); if (nsep) printf("+"); if (scale > 1) printf("%u*", scale); printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg)); nsep = true; } #ifdef RELOC_SUPPORT if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp)) { if (nsep) printf("+"); emitDispReloc(disp); } else #endif { // Munge any pointers if we want diff-able disassembly if (emitComp->opts.disDiffable) { ssize_t top12bits = (disp >> 20); if ((top12bits != 0) && (top12bits != -1)) disp = 0xD1FFAB1E; } if (disp > 0) { if (nsep) printf("+"); if (frameRef) printf("%02XH", disp); else if (disp < 1000) printf("%d", disp); else if (disp <= 0xFFFF) printf("%04XH", disp); else printf("%08XH", disp); } else if (disp < 0) { if (frameRef) printf("-%02XH", -disp); else if (disp > -1000) printf("-%d", -disp); else if (disp >= -0xFFFF) printf("-%04XH", -disp); else if ((disp & 0x7F000000) != 0x7F000000) printf("%08XH", disp); else printf("-%08XH", -disp); } else if (!nsep) { printf("%04XH", disp); } } printf("]"); if (id->idDebugOnlyInfo()->idClsCookie) { if (id->idIns() == INS_call) printf("%s", emitFncName((CORINFO_METHOD_HANDLE) id->idDebugOnlyInfo()->idMemCookie)); else printf("%s", emitFldName((CORINFO_FIELD_HANDLE) id->idDebugOnlyInfo()->idMemCookie)); } // pretty print string if it looks like one else if (id->idGCref() == GCT_GCREF && id->idIns() == INS_mov && id->idAddr()->iiaAddrMode.amBaseReg == REG_NA) { const wchar_t* str = emitComp->eeGetCPString(disp); if (str != 0) printf(" '%S'", str); } if (jdsc && !noDetail) { unsigned cnt = (jdsc->dsSize - 1) / sizeof(void*); BasicBlock** bbp = (BasicBlock**)jdsc->dsCont; #ifdef _TARGET_AMD64_ # define SIZE_LETTER "Q" #else # define SIZE_LETTER "D" #endif printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno); /* Display the label table (it's stored as "BasicBlock*" values) */ do { insGroup* lab; /* Convert the BasicBlock* value to an IG address */ lab = (insGroup*)emitCodeGetCookie(*bbp++); assert(lab); printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum); } while (--cnt); } } /***************************************************************************** * * If the given instruction is a shift, display the 2nd operand. */ void emitter::emitDispShift(instruction ins, int cnt) { switch (ins) { case INS_rcl_1: case INS_rcr_1: case INS_rol_1: case INS_ror_1: case INS_shl_1: case INS_shr_1: case INS_sar_1: printf(", 1"); break; case INS_rcl: case INS_rcr: case INS_rol: case INS_ror: case INS_shl: case INS_shr: case INS_sar: printf(", cl"); break; case INS_rcl_N: case INS_rcr_N: case INS_rol_N: case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: printf(", %d", cnt); break; default: break; } } /***************************************************************************** * * Display (optionally) the bytes for the instruction encoding in hex */ void emitter::emitDispInsHex(BYTE * code, size_t sz) { // We do not display the instruction hex if we want diff-able disassembly if (!emitComp->opts.disDiffable) { #ifdef _TARGET_AMD64_ // how many bytes per instruction we format for const size_t digits = 10; #else // _TARGET_X86 const size_t digits = 6; #endif printf(" "); for (unsigned i=0; i < sz; i++) { printf("%02X", (*((BYTE *) (code+i)))); } if (sz < digits) { printf("%.*s", 2 * (digits - sz), " "); } } } /***************************************************************************** * * Display the given instruction. */ void emitter::emitDispIns(instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig) { emitAttr attr; const char* sstr; instruction ins = id->idIns(); if (emitComp->verbose) { unsigned idNum = id->idDebugOnlyInfo()->idNum; printf("IN%04x: ", idNum); } #ifdef RELOC_SUPPORT # define ID_INFO_DSP_RELOC ((bool) (id->idIsDspReloc())) #else # define ID_INFO_DSP_RELOC false #endif /* Display a constant value if the instruction references one */ if (!isNew) { switch (id->idInsFmt()) { int offs; case IF_MRD_RRD: case IF_MWR_RRD: case IF_MRW_RRD: case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: case IF_MRD_CNS: case IF_MWR_CNS: case IF_MRW_CNS: case IF_MRW_SHF: case IF_MRD: case IF_MWR: case IF_MRW: #if FEATURE_STACK_FP_X87 case IF_TRD_MRD: case IF_TWR_MRD: case IF_TRW_MRD: // case IF_MRD_TRD: case IF_MWR_TRD: // case IF_MRW_TRD: #endif // FEATURE_STACK_FP_X87 case IF_MRD_OFF: /* Is this actually a reference to a data section? */ offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd); if (offs >= 0) { void* addr; /* Display a data section reference */ assert((unsigned)offs < emitConsDsc.dsdOffs); addr = emitConsBlock ? emitConsBlock + offs : nullptr; #if 0 // TODO-XArch-Cleanup: Fix or remove this code. /* Is the operand an integer or floating-point value? */ bool isFP = false; if (CodeGen::instIsFP(id->idIns())) { switch (id->idIns()) { case INS_fild: case INS_fildl: break; default: isFP = true; break; } } if (offs & 1) printf("@CNS%02u", offs); else printf("@RWD%02u", offs); printf(" "); if (addr) { addr = 0; // TODO-XArch-Bug?: // This was busted by switching the order // in which we output the code block vs. // the data blocks -- when we get here, // the data block has not been filled in // yet, so we'll display garbage. if (isFP) { if (id->idOpSize() == EA_4BYTE) printf("DF %f \n", addr ? *(float *)addr : 0); else printf("DQ %lf\n", addr ? *(double *)addr : 0); } else { if (id->idOpSize() <= EA_4BYTE) printf("DD %d \n", addr ? *(int *)addr : 0); else printf("DQ %D \n", addr ? *(__int64 *)addr : 0); } } #endif } break; default: break; } } // printf("[F=%s] " , emitIfName(id->idInsFmt())); // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum); // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth); // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32)); // printf("[A=%08X] " , emitSimpleStkMask); // printf("[A=%08X] " , emitSimpleByrefStkMask); // printf("[L=%02u] " , id->idCodeSize()); if (!emitComp->opts.dspEmit && !isNew && !asmfm) doffs = true; /* Display the instruction offset */ emitDispInsOffs(offset, doffs); if (code != nullptr) { /* Display the instruction hex code */ emitDispInsHex(code, sz); } /* Display the instruction name */ sstr = codeGen->genInsName(ins); #ifdef FEATURE_AVX_SUPPORT if (IsAVXInstruction(ins)) { printf(" v%-8s", sstr); } else #endif // FEATURE_AVX_SUPPORT { printf(" %-9s", sstr); } #ifndef FEATURE_PAL if (strnlen_s(sstr, 10) > 8) #else // FEATURE_PAL if (strnlen(sstr, 10) > 8) #endif // FEATURE_PAL { printf(" "); } /* By now the size better be set to something */ assert(emitInstCodeSz(id) || emitInstHasNoCode(ins)); /* Figure out the operand size */ if (id->idGCref() == GCT_GCREF) { attr = EA_GCREF; sstr = "gword ptr "; } else if (id->idGCref() == GCT_BYREF) { attr = EA_BYREF; sstr = "bword ptr "; } else { attr = id->idOpSize(); sstr = codeGen->genSizeStr(attr); if (ins == INS_lea) { #ifdef _TARGET_AMD64_ assert((attr == EA_4BYTE) || (attr == EA_8BYTE)); #else assert(attr == EA_4BYTE); #endif sstr = ""; } } /* Now see what instruction format we've got */ // First print the implicit register usage if (instrHasImplicitRegPairDest(ins)) { printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize())); } else if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); printf("%s, ", emitRegName(tgtReg, id->idOpSize())); } switch (id->idInsFmt()) { ssize_t val; ssize_t offs; CnsVal cnsVal; const char* methodName; case IF_CNS: val = emitGetInsSC(id); #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); #endif #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) emitDispReloc(val); else #endif { PRINT_CONSTANT: // Munge any pointers if we want diff-able disassembly if (emitComp->opts.disDiffable) { ssize_t top12bits = (val >> 20); if ((top12bits != 0) && (top12bits != -1)) val = 0xD1FFAB1E; } if ((val > -1000) && (val < 1000)) printf("%d", val); else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000)) printf("0x%IX", val); else // (val < 0) printf("-0x%IX", -val); } break; case IF_ARD: case IF_AWR: case IF_ARW: #if FEATURE_STACK_FP_X87 case IF_TRD_ARD: case IF_TWR_ARD: case IF_TRW_ARD: // case IF_ARD_TRD: case IF_AWR_TRD: // case IF_ARW_TRD: #endif // FEATURE_STACK_FP_X87 if (ins == INS_call && id->idIsCallRegPtr()) { printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg)); break; } printf("%s", sstr); emitDispAddrMode(id, isNew); emitDispShift(ins); if (ins == INS_call) { assert(id->idInsFmt() == IF_ARD); /* Ignore indirect calls */ if (id->idDebugOnlyInfo()->idMemCookie == 0) break; assert(id->idDebugOnlyInfo()->idMemCookie); /* This is a virtual call */ methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); printf("%s", methodName); } break; case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: if (IsAVXInstruction(ins)) { printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); } else if (IsSSE2Instruction(ins)) { printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); } else #ifdef _TARGET_AMD64_ if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } else #endif if (ins == INS_movsx || ins == INS_movzx) { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr); } else { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); } emitDispAddrMode(id); break; case IF_ARD_RRD: case IF_AWR_RRD: case IF_ARW_RRD: printf("%s", sstr); emitDispAddrMode(id); if (IsAVXInstruction(ins)) { printf(", %s", emitYMMregName((unsigned)id->idReg1())); } else if (IsSSE2Instruction(ins)) { printf(", %s", emitXMMregName((unsigned)id->idReg1())); } else { printf(", %s", emitRegName(id->idReg1(), attr)); } break; case IF_ARD_CNS: case IF_AWR_CNS: case IF_ARW_CNS: case IF_ARW_SHF: printf("%s", sstr); emitDispAddrMode(id); emitGetInsAmdCns(id, &cnsVal); val = cnsVal.cnsVal; #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); #endif if (id->idInsFmt() == IF_ARW_SHF) emitDispShift(ins, (BYTE)val); else { printf(", "); #ifdef RELOC_SUPPORT if (cnsVal.cnsReloc) emitDispReloc(val); else #endif goto PRINT_CONSTANT; } break; case IF_SRD: case IF_SWR: case IF_SRW: #if FEATURE_STACK_FP_X87 case IF_TRD_SRD: case IF_TWR_SRD: case IF_TRW_SRD: // case IF_SRD_TRD: case IF_SWR_TRD: // case IF_SRW_TRD: #endif // FEATURE_STACK_FP_X87 printf("%s", sstr); #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_pop) emitCurStackLvl -= sizeof(int); #endif emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_pop) emitCurStackLvl += sizeof(int); #endif emitDispShift(ins); break; case IF_SRD_RRD: case IF_SWR_RRD: case IF_SRW_RRD: printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); if (IsAVXInstruction(ins)) { printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); } else if (IsSSE2Instruction(ins)) { printf(", %s", emitXMMregName((unsigned)id->idReg1()), sstr); } else { printf(", %s", emitRegName(id->idReg1(), attr)); } break; case IF_SRD_CNS: case IF_SWR_CNS: case IF_SRW_CNS: case IF_SRW_SHF: printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); emitGetInsCns(id, &cnsVal); val = cnsVal.cnsVal; #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); #endif if (id->idInsFmt() == IF_SRW_SHF) emitDispShift(ins, (BYTE)val); else { printf(", "); #ifdef RELOC_SUPPORT if (cnsVal.cnsReloc) { emitDispReloc(val); } else #endif { goto PRINT_CONSTANT; } } break; case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: if (IsAVXInstruction(ins)) { printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); } else if (IsSSE2Instruction(ins)) { printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); } else #ifdef _TARGET_AMD64_ if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } else #endif if (ins == INS_movsx || ins == INS_movzx) { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr); } else { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); } emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); break; case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: if (ins == INS_mov_i2xmm) { printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr)); } else if (ins == INS_mov_xmm2i) { printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1())); } else if (ins == INS_cvttsd2si) { printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2())); } else if (IsAVXInstruction(ins)) { printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2())); } else if (IsSSE2Instruction(ins)) { printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2())); } #ifdef _TARGET_AMD64 else if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE)); } #endif // _TARGET_AMD64 else if (ins == INS_movsx || ins == INS_movzx) { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr)); } else { printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); } break; case IF_RRW_RRW: assert(ins == INS_xchg); printf("%s,", emitRegName(id->idReg1(), attr)); printf(" %s", emitRegName(id->idReg2(), attr)); break; #ifdef FEATURE_AVX_SUPPORT case IF_RWR_RRD_RRD: assert(IsAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); printf("%s, ", emitRegName(id->idReg1(), attr)); printf("%s, ", emitRegName(id->idReg2(), attr)); printf("%s", emitRegName(id->idReg3(), attr)); break; #endif case IF_RRW_RRW_CNS: if (IsAVXInstruction(ins)) { printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr); printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr); } else { printf("%s,", emitRegName(id->idReg1(), attr)); printf(" %s", emitRegName(id->idReg2(), attr)); } val = emitGetInsSC(id); #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); #endif printf(", "); #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) emitDispReloc(val); else #endif goto PRINT_CONSTANT; break; case IF_RRD: case IF_RWR: case IF_RRW: printf("%s", emitRegName(id->idReg1(), attr)); emitDispShift(ins); break; case IF_RRW_SHF: printf("%s", emitRegName(id->idReg1(), attr)); emitDispShift(ins, (BYTE)emitGetInsSC(id)); break; case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: if (ins == INS_movsx || ins == INS_movzx) attr = EA_PTRSIZE; #ifdef _TARGET_AMD64_ else if (ins == INS_movsxd) { attr = EA_PTRSIZE; } #endif if (IsAVXInstruction(ins)) { printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); } else if (IsSSE2Instruction(ins)) { printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); } else { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); } offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; case IF_RWR_MRD_OFF: printf("%s, %s", emitRegName(id->idReg1(), attr), "offset"); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; case IF_MRD_RRD: case IF_MWR_RRD: case IF_MRW_RRD: printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); printf(", %s", emitRegName(id->idReg1(), attr)); break; case IF_MRD_CNS: case IF_MWR_CNS: case IF_MRW_CNS: case IF_MRW_SHF: printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); emitGetInsDcmCns(id, &cnsVal); val = cnsVal.cnsVal; #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); #endif #ifdef RELOC_SUPPORT if (cnsVal.cnsReloc) emitDispReloc(val); else #endif if (id->idInsFmt() == IF_MRW_SHF) emitDispShift(ins, (BYTE)val); else { printf(", "); goto PRINT_CONSTANT; } break; case IF_MRD: case IF_MWR: case IF_MRW: #if FEATURE_STACK_FP_X87 case IF_TRD_MRD: case IF_TWR_MRD: case IF_TRW_MRD: // case IF_MRD_TRD: case IF_MWR_TRD: // case IF_MRW_TRD: #endif // FEATURE_STACK_FP_X87 printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); emitDispShift(ins); break; case IF_MRD_OFF: printf("offset "); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; case IF_RRD_CNS: case IF_RWR_CNS: case IF_RRW_CNS: printf("%s, ", emitRegName(id->idReg1(), attr)); val = emitGetInsSC(id); #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) emitDispReloc(val); else #endif goto PRINT_CONSTANT; break; #if FEATURE_STACK_FP_X87 case IF_TRD_FRD: case IF_TWR_FRD: case IF_TRW_FRD: switch (ins) { case INS_fld: case INS_fxch: break; default: printf("%s, ", emitFPregName(0)); break; } printf("%s", emitFPregName((unsigned)id->idReg1())); break; case IF_FRD_TRD: case IF_FWR_TRD: case IF_FRW_TRD: printf("%s", emitFPregName((unsigned)id->idReg1())); if (ins != INS_fst && ins != INS_fstp) printf(", %s", emitFPregName(0)); break; #endif // FEATURE_STACK_FP_X87 case IF_LABEL: case IF_RWR_LABEL: case IF_SWR_LABEL: if (ins == INS_lea) { printf("%s, ", emitRegName(id->idReg1(), attr)); } else if (ins == INS_mov) { /* mov dword ptr [frame.callSiteReturnAddress], label */ assert(id->idInsFmt() == IF_SWR_LABEL); instrDescLbl* idlbl = (instrDescLbl*)id; emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm); printf(", "); } if (((instrDescJmp*)id)->idjShort) printf("SHORT "); if (id->idIsBound()) { printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum); } else { printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum); } break; case IF_METHOD: case IF_METHPTR: if (id->idIsCallAddr()) { offs = (ssize_t)id->idAddr()->iiaAddr; methodName = ""; } else { offs = 0; methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); } if (id->idInsFmt() == IF_METHPTR) printf("["); if (offs) { if (id->idIsDspReloc()) printf("reloc "); printf("%08X", offs); } else { printf("%s", methodName); } if (id->idInsFmt() == IF_METHPTR) printf("]"); break; #if FEATURE_STACK_FP_X87 case IF_TRD: case IF_TWR: case IF_TRW: #endif // FEATURE_STACK_FP_X87 case IF_NONE: break; default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); break; } if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose)) { // Code size in the instrDesc is different from the actual code size we've been given! printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz); } printf("\n"); } /*****************************************************************************/ #endif /***************************************************************************** * * Output nBytes bytes of NOP instructions */ static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes) { assert(nBytes <= 15); #ifndef _TARGET_AMD64_ // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the // more efficient real NOP: 0x0F 0x1F +modR/M // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP) // because debugger and msdis don't like it, so maybe VIA doesn't either // So instead just stick to repeating single byte nops switch (nBytes) { case 15: *dst++ = 0x90; __fallthrough; case 14: *dst++ = 0x90; __fallthrough; case 13: *dst++ = 0x90; __fallthrough; case 12: *dst++ = 0x90; __fallthrough; case 11: *dst++ = 0x90; __fallthrough; case 10: *dst++ = 0x90; __fallthrough; case 9: *dst++ = 0x90; __fallthrough; case 8: *dst++ = 0x90; __fallthrough; case 7: *dst++ = 0x90; __fallthrough; case 6: *dst++ = 0x90; __fallthrough; case 5: *dst++ = 0x90; __fallthrough; case 4: *dst++ = 0x90; __fallthrough; case 3: *dst++ = 0x90; __fallthrough; case 2: *dst++ = 0x90; __fallthrough; case 1: *dst++ = 0x90; break; case 0: break; } #else // _TARGET_AMD64_ switch (nBytes) { case 2: *dst++ = 0x66; __fallthrough; case 1: *dst++ = 0x90; break; case 0: break; case 3: *dst++ = 0x0F; *dst++ = 0x1F; *dst++ = 0x00; break; case 4: *dst++ = 0x0F; *dst++ = 0x1F; *dst++ = 0x40; *dst++ = 0x00; break; case 6: *dst++ = 0x66; __fallthrough; case 5: *dst++ = 0x0F; *dst++ = 0x1F; *dst++ = 0x44; *dst++ = 0x00; *dst++ = 0x00; break; case 7: *dst++ = 0x0F; *dst++ = 0x1F; *dst++ = 0x80; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; break; case 15: // More than 3 prefixes is slower than just 2 NOPs dst = emitOutputNOP(emitOutputNOP(dst, 7), 8); break; case 14: // More than 3 prefixes is slower than just 2 NOPs dst = emitOutputNOP(emitOutputNOP(dst, 7), 7); break; case 13: // More than 3 prefixes is slower than just 2 NOPs dst = emitOutputNOP(emitOutputNOP(dst, 5), 8); break; case 12: // More than 3 prefixes is slower than just 2 NOPs dst = emitOutputNOP(emitOutputNOP(dst, 4), 8); break; case 11: *dst++ = 0x66; __fallthrough; case 10: *dst++ = 0x66; __fallthrough; case 9: *dst++ = 0x66; __fallthrough; case 8: *dst++ = 0x0F; *dst++ = 0x1F; *dst++ = 0x84; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; break; } #endif // _TARGET_AMD64_ return dst; } /***************************************************************************** * * Output an instruction involving an address mode. */ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) { regNumber reg; regNumber rgx; ssize_t dsp; bool dspInByte; bool dspIsZero; instruction ins = id->idIns(); emitAttr size = id->idOpSize(); size_t opsz = EA_SIZE_IN_BYTES(size); // Get the base/index registers reg = id->idAddr()->iiaAddrMode.amBaseReg; rgx = id->idAddr()->iiaAddrMode.amIndxReg; // For INS_call the instruction size is actually the return value size if (ins == INS_call) { // Special case: call via a register if (id->idIsCallRegPtr()) { size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode); dst += emitOutputWord(dst, opcode); goto DONE; } // The displacement field is in an unusual place for calls dsp = emitGetInsCIdisp(id); #ifdef _TARGET_AMD64_ // Compute the REX prefix if it exists if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); reg = RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); rgx = RegEncoding(rgx); } // And emit the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); #endif // _TARGET_AMD64_ goto GOT_DSP; } // Is there a large constant operand? if (addc && (size > EA_1BYTE)) { ssize_t cval = addc->cnsVal; // Does the constant fit in a byte? if ((signed char)cval == cval && #ifdef RELOC_SUPPORT addc->cnsReloc == false && #endif ins != INS_mov && ins != INS_test) { if (id->idInsFmt() != IF_ARW_SHF) code |= 2; opsz = 1; } } // Emit VEX prefix if required // There are some callers who already add VEX prefix and call this routine. // Therefore, add VEX prefix is one is not already present. code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // Encode source operand reg in 'vvvv' bits in 1's compliement form // The order of operands are reversed, therefore use reg2 as the source. code = insEncodeReg3456(ins, id->idReg1(), size, code); } // Emit the REX prefix if required if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); reg = RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); rgx = RegEncoding(rgx); } // Is this a 'big' opcode? if (code & 0xFF000000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again as in case of AVX instructions leading opcode bytes are stripped off // and encoded as part of VEX prefix. if (code & 0xFF000000) { dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; } } else if (code & 0x00FF0000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode if (code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } // Use the large version if this is not a byte. This trick will not // work in case of SSE2 and AVX instructions. if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) code++; } else if (CodeGen::instIsFP(ins)) { #if FEATURE_STACK_FP_X87 assert(size == EA_4BYTE || size == EA_8BYTE || ins == INS_fldcw || ins == INS_fnstcw); #else // !FEATURE_STACK_FP_X87 assert(size == EA_4BYTE || size == EA_8BYTE); #endif // ! FEATURE_STACK_FP_X87 if (size == EA_8BYTE) code += 4; } else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) { /* Is the operand size larger than a byte? */ switch (size) { case EA_1BYTE: break; case EA_2BYTE: /* Output a size prefix for a 16-bit operand */ dst += emitOutputByte(dst, 0x66); __fallthrough; case EA_4BYTE: AMD64_ONLY(case EA_8BYTE:) /* Set the 'w' bit to get the large version */ code |= 0x1; break; #ifdef _TARGET_X86_ case EA_8BYTE: /* Double operand - set the appropriate bit */ code |= 0x04; break; #endif // _TARGET_X86_ default: NO_WAY("unexpected size"); break; } } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Get the displacement value dsp = emitGetInsAmdAny(id); GOT_DSP: dspInByte = ((signed char)dsp == (ssize_t)dsp); dspIsZero = (dsp == 0); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { dspInByte = false; // relocs can't be placed in a byte } #endif // Is there a [scaled] index component? if (rgx == REG_NA) { // The address is of the form "[reg+disp]" switch (reg) { case REG_NA: if (id->idIsDspReloc()) { INT32 addlDelta = 0; // The address is of the form "[disp]" // On x86 - disp is relative to zero // On Amd64 - disp is relative to RIP dst += emitOutputWord(dst, code | 0x0500); if (addc) { // It is of the form "ins [disp], immed" // For emitting relocation, we also need to take into account of the // additional bytes of code emitted for immed val. ssize_t cval = addc->cnsVal; #ifdef _TARGET_AMD64_ // all these opcodes only take a sign-extended 4-byte immediate noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); #else noway_assert(opsz <= 4); #endif switch (opsz) { case 0: case 4: case 8: addlDelta = -4; break; case 2: addlDelta = -2; break; case 1: addlDelta = -1; break; default: assert(!"unexpected operand size"); unreached(); } } #ifdef _TARGET_AMD64_ // We emit zero on Amd64, to avoid the assert in emitOutputLong() dst += emitOutputLong(dst, 0); #else dst += emitOutputLong(dst, dsp); #endif emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0, addlDelta); } else { #ifdef _TARGET_X86_ dst += emitOutputWord(dst, code | 0x0500); #else //_TARGET_AMD64_ // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. // This addr mode should never be used while generating relocatable ngen code nor if // the addr can be encoded as pc-relative address. noway_assert(!emitComp->opts.compReloc); noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32); noway_assert((int)dsp == dsp); // This requires, specifying a SIB byte after ModRM byte. dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, 0x25); #endif //_TARGET_AMD64_ dst += emitOutputLong(dst, dsp); } break; case REG_EBP: // Does the offset fit in a byte? if (dspInByte) { dst += emitOutputWord(dst, code | 0x4500); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8500); dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } break; case REG_ESP: #ifdef LEGACY_BACKEND // REG_ESP could be REG_R12, which applies to any instruction // // This assert isn't too helpful from the OptJit point of view // // a better question is why is it here at all // assert((ins == INS_lea) || (ins == INS_mov) || (ins == INS_test) || (ins == INS_cmp) || (ins == INS_fld && dspIsZero) || (ins == INS_fstp && dspIsZero) || (ins == INS_fistp && dspIsZero) || IsSSE2Instruction(ins) || IsAVXInstruction(ins) || (ins == INS_or)); #endif // LEGACY_BACKEND // Is the offset 0 or does it at least fit in a byte? if (dspIsZero) { dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, 0x24); } else if (dspInByte) { dst += emitOutputWord(dst, code | 0x4400); dst += emitOutputByte(dst, 0x24); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8400); dst += emitOutputByte(dst, 0x24); dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } break; default: // Put the register in the opcode code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8; // Is there a displacement? if (dspIsZero) { // This is simply "[reg]" dst += emitOutputWord(dst, code); } else { // This is [reg + dsp]" -- does the offset fit in a byte? if (dspInByte) { dst += emitOutputWord(dst, code | 0x4000); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8000); dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } } break; } } else { unsigned regByte; // We have a scaled index operand unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale); // Is the index operand scaled? if (mul > 1) { // Is there a base register? if (reg != REG_NA) { // The address is "[reg + {2/4/8} * rgx + icon]" regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] if (dspIsZero && reg != REG_EBP) { // The address is "[reg + {2/4/8} * rgx]" dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, regByte); } else { // The address is "[reg + {2/4/8} * rgx + disp]" if (dspInByte) { dst += emitOutputWord(dst, code | 0x4400); dst += emitOutputByte(dst, regByte); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8400); dst += emitOutputByte(dst, regByte); dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } } } else { // The address is "[{2/4/8} * rgx + icon]" regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx , EA_PTRSIZE, nullptr) | insSSval(mul); dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, regByte); // Special case: jump through a jump table if (ins == INS_i_jmp) dsp += (size_t)emitConsBlock; dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } } else { // The address is "[reg+rgx+dsp]" regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr); if (dspIsZero && reg != REG_EBP) { // This is [reg+rgx]" dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, regByte); } else { // This is [reg+rgx+dsp]" -- does the offset fit in a byte? if (dspInByte) { dst += emitOutputWord(dst, code | 0x4400); dst += emitOutputByte(dst, regByte); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8400); dst += emitOutputByte(dst, regByte); dst += emitOutputLong(dst, dsp); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); } #endif } } } } // Now generate the constant value, if present if (addc) { ssize_t cval = addc->cnsVal; #ifdef _TARGET_AMD64_ // all these opcodes only take a sign-extended 4-byte immediate noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); #endif switch (opsz) { case 0: case 4: case 8: dst += emitOutputLong(dst, cval); break; case 2: dst += emitOutputWord(dst, cval); break; case 1: dst += emitOutputByte(dst, cval); break; default: assert(!"unexpected operand size"); } #ifdef RELOC_SUPPORT if (addc->cnsReloc) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); assert(opsz == 4); } #endif } DONE: // Does this instruction operate on a GC ref value? if (id->idGCref()) { switch (id->idInsFmt()) { case IF_ARD: case IF_AWR: case IF_ARW: break; case IF_RRD_ARD: break; case IF_RWR_ARD: emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_RRW_ARD: assert(id->idGCref() == GCT_BYREF); #ifdef DEBUG regMaskTP regMask; regMask = genRegMask(id->idReg1()); // VSW 335101 // Either id->idReg1(), id->idAddr()->iiaAddrMode.amBaseReg, or id->idAddr()->iiaAddrMode.amIndxReg // could be a BYREF. // For example in the following case: // mov EDX, bword ptr [EBP-78H] ; EDX becomes BYREF after this instr. // add EAX, bword ptr [EDX+8] ; It is the EDX that's causing id->idGCref to be GCT_BYREF. // ; EAX becomes BYREF after this instr. // DD 273707 // add EDX, bword ptr [036464E0H] ; int + static field (technically a GCREF)=BYREF regMaskTP baseRegMask; if (reg == REG_NA) { baseRegMask = RBM_NONE; } else { baseRegMask = genRegMask(reg); } regMaskTP indexRegMask; if (rgx == REG_NA) { indexRegMask = RBM_NONE; } else { indexRegMask = genRegMask(rgx); } // r1 could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF assert(((reg == REG_NA) && (rgx == REG_NA) && (ins == INS_add || ins == INS_sub)) || (( (regMask | baseRegMask | indexRegMask) & emitThisGCrefRegs) && (ins == INS_add )) || (( (regMask | baseRegMask | indexRegMask) & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); #endif // Mark it as holding a GCT_BYREF emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); break; case IF_ARD_RRD: case IF_AWR_RRD: break; case IF_ARD_CNS: case IF_AWR_CNS: break; case IF_ARW_RRD: case IF_ARW_CNS: assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } // mul can never produce a GC ref assert(!instrIs3opImul(ins)); assert(ins != INS_mulEAX && ins != INS_imulEAX); } else { if (emitInsCanOnlyWriteSSE2OrAVXReg(id)) { } else { switch (id->idInsFmt()) { case IF_RWR_ARD: emitGCregDeadUpd(id->idReg1(), dst); break; default: break; } if (ins == INS_mulEAX || ins == INS_imulEAX) { emitGCregDeadUpd(REG_EAX, dst); emitGCregDeadUpd(REG_EDX, dst); } // For the three operand imul instruction the target register // is encoded in the opcode if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); emitGCregDeadUpd(tgtReg, dst); } } } return dst; } /***************************************************************************** * * Output an instruction involving a stack frame value. */ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) { int adr; int dsp; bool EBPbased; bool dspInByte; bool dspIsZero; instruction ins = id->idIns(); emitAttr size = id->idOpSize(); size_t opsz = EA_SIZE_IN_BYTES(size); assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE); // Is there a large constant operand? if (addc && (size > EA_1BYTE)) { ssize_t cval = addc->cnsVal; // Does the constant fit in a byte? if ((signed char)cval == cval && #ifdef RELOC_SUPPORT addc->cnsReloc == false && #endif ins != INS_mov && ins != INS_test) { if (id->idInsFmt() != IF_SRW_SHF) code |= 2; opsz = 1; } } // Add VEX prefix if required. // There are some callers who already add VEX prefix and call this routine. // Therefore, add VEX prefix is one is not already present. code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); // Compute the REX prefix if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte assert((code & 0xFF) == 0); dst += emitOutputByte(dst, (code >> 8) & 0xFF); code = regcode; } // Is this a 'big' opcode? else if (code & 0xFF000000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again because in case of AVX instructions the leading // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix. if (code & 0xFF000000) { dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; } } else if (code & 0x00FF0000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode. // We need to check again because in case of AVX instructions the leading // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix. if (code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } // Use the large version if this is not a byte if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) code |= 0x1; } else if (CodeGen::instIsFP(ins)) { assert(size == EA_4BYTE || size == EA_8BYTE); if (size == EA_8BYTE) code += 4; } else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) { // Is the operand size larger than a byte? switch (size) { case EA_1BYTE: break; case EA_2BYTE: // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); __fallthrough; case EA_4BYTE: #ifdef _TARGET_AMD64_ case EA_8BYTE: #endif // _TARGET_AMD64_ /* Set the 'w' size bit to indicate 32-bit operation * Note that incrementing "code" for INS_call (0xFF) would * overflow, whereas setting the lower bit to 1 just works out */ code |= 0x01; break; #ifdef _TARGET_X86_ case EA_8BYTE: // Double operand - set the appropriate bit. // I don't know what a legitimate reason to end up in this case would be // considering that FP is taken care of above... // what is an instruction that takes a double which is not covered by the // above instIsFP? Of the list in instrsxarch, only INS_fprem code |= 0x04; NO_WAY("bad 8 byte op"); break; #endif // _TARGET_X86_ default: NO_WAY("unexpected size"); break; } } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Figure out the variable's frame position int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); adr = emitComp->lvaFrameAddress(varNum, &EBPbased); dsp = adr + id->idAddr()->iiaLclVar.lvaOffset(); dspInByte = ((signed char)dsp == (int)dsp); dspIsZero = (dsp == 0); #ifdef RELOC_SUPPORT // for stack varaibles the dsp should never be a reloc assert(id->idIsDspReloc() == 0); #endif if (EBPbased) { // EBP-based variable: does the offset fit in a byte? if (Is4ByteAVXInstruction(ins)) { if (dspInByte) { dst += emitOutputByte(dst, code | 0x45); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputByte(dst, code | 0x85); dst += emitOutputLong(dst, dsp); } } else { if (dspInByte) { dst += emitOutputWord(dst, code | 0x4500); dst += emitOutputByte(dst, dsp); } else { dst += emitOutputWord(dst, code | 0x8500); dst += emitOutputLong(dst, dsp); } } } else { #if !FEATURE_FIXED_OUT_ARGS // Adjust the offset by the amount currently pushed on the CPU stack dsp += emitCurStackLvl; #endif dspInByte = ((signed char)dsp == (int)dsp); dspIsZero = (dsp == 0); // Does the offset fit in a byte? if (Is4ByteAVXInstruction(ins)) { if (dspInByte) { if (dspIsZero) { dst += emitOutputByte(dst, code | 0x04); dst += emitOutputByte(dst, 0x24); } else { dst += emitOutputByte(dst, code | 0x44); dst += emitOutputByte(dst, 0x24); dst += emitOutputByte(dst, dsp); } } else { dst += emitOutputByte(dst, code | 0x84); dst += emitOutputByte(dst, 0x24); dst += emitOutputLong(dst, dsp); } } else { if (dspInByte) { if (dspIsZero) { dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, 0x24); } else { dst += emitOutputWord(dst, code | 0x4400); dst += emitOutputByte(dst, 0x24); dst += emitOutputByte(dst, dsp); } } else { dst += emitOutputWord(dst, code | 0x8400); dst += emitOutputByte(dst, 0x24); dst += emitOutputLong(dst, dsp); } } } // Now generate the constant value, if present if (addc) { ssize_t cval = addc->cnsVal; #ifdef _TARGET_AMD64_ // all these opcodes only take a sign-extended 4-byte immediate noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); #endif switch (opsz) { case 0: case 4: case 8: dst += emitOutputLong(dst, cval); break; case 2: dst += emitOutputWord(dst, cval); break; case 1: dst += emitOutputByte(dst, cval); break; default: assert(!"unexpected operand size"); } #ifdef RELOC_SUPPORT if (addc->cnsReloc) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); assert(opsz == 4); } #endif } // Does this instruction operate on a GC ref value? if (id->idGCref()) { // Factor in the sub-variable offset adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); switch (id->idInsFmt()) { case IF_SRD: // Read stack -- no change break; case IF_SWR: // Stack Write (So we need to update GC live for stack var) // Write stack -- GC var may be born emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst); break; case IF_SRD_CNS: // Read stack -- no change break; case IF_SWR_CNS: // Write stack -- no change break; case IF_SRD_RRD: case IF_RRD_SRD: // Read stack , read register -- no change break; case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register) // Read stack , write register -- GC reg may be born emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var) // Read register, write stack -- GC var may be born emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst); break; case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register) // reg could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_SRW_CNS: case IF_SRW_RRD: // += -= of a byref, no change case IF_SRW: break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } } else { if (emitInsCanOnlyWriteSSE2OrAVXReg(id)) { } else { switch (id->idInsFmt()) { case IF_RWR_SRD: // Register Write, Stack Read case IF_RRW_SRD: // Register Read/Write, Stack Read emitGCregDeadUpd(id->idReg1(), dst); break; default: break; } if (ins == INS_mulEAX || ins == INS_imulEAX) { emitGCregDeadUpd(REG_EAX, dst); emitGCregDeadUpd(REG_EDX, dst); } // For the three operand imul instruction the target register // is encoded in the opcode if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); emitGCregDeadUpd(tgtReg, dst); } } } return dst; } /***************************************************************************** * * Output an instruction with a static data member (class variable). */ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) { BYTE* addr; CORINFO_FIELD_HANDLE fldh; ssize_t offs; int doff; emitAttr size = id->idOpSize(); size_t opsz = EA_SIZE_IN_BYTES(size); instruction ins = id->idIns(); bool isMoffset = false; // Get hold of the field handle and offset fldh = id->idAddr()->iiaFieldHnd; offs = emitGetInsDsp(id); // Special case: mov reg, fs:[ddd] if (fldh == FLD_GLOBAL_FS) { dst += emitOutputByte(dst, 0x64); } // Compute VEX prefix // Some of its callers already add VEX prefix and then call this routine. // Therefore add VEX prefix is not already present. code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); // Compute the REX prefix if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } // Is there a large constant operand? if (addc && (size > EA_1BYTE)) { ssize_t cval = addc->cnsVal; // Does the constant fit in a byte? if ((signed char)cval == cval && #ifdef RELOC_SUPPORT addc->cnsReloc == false && #endif ins != INS_mov && ins != INS_test) { if (id->idInsFmt() != IF_MRW_SHF) code |= 2; opsz = 1; } } #ifdef _TARGET_X86_ else { // Special case: "mov eax, [addr]" and "mov [addr], eax" // Amd64: this is one case where addr can be 64-bit in size. This is // currently unused or not enabled on amd64 as it always uses RIP // relative addressing which results in smaller instruction size. if (ins == INS_mov && id->idReg1() == REG_EAX) { switch (id->idInsFmt()) { case IF_RWR_MRD: assert((unsigned)code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); code &= ~((size_t)0xFFFFFFFF); code |= 0xA0; isMoffset = true; break; case IF_MWR_RRD: assert((unsigned)code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); code &= ~((size_t)0xFFFFFFFF); code |= 0xA2; isMoffset = true; break; default: break; } } } #endif //_TARGET_X86_ // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte // TODO-XArch-CQ: Right now support 4-byte opcode instructions only assert((code & 0xFF) == 0); dst += emitOutputByte(dst, (code >> 8) & 0xFF); code = 0; // Emit Mod,R/M byte dst += emitOutputByte(dst, regcode | 0x05); } // Is this a 'big' opcode? else if (code & 0xFF000000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode. // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix. if (code & 0xFF000000) { dst += emitOutputWord(dst, code >> 16); } code &= 0x0000FFFF; } else if(code & 0x00FF0000) { // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Check again as VEX prefix would have encoded leading opcode byte if (code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd || insIsCMOV(ins)) && size != EA_1BYTE) { // movsx and movzx are 'big' opcodes but also have the 'w' bit code++; } } else if (CodeGen::instIsFP(ins)) { assert(size == EA_4BYTE || size == EA_8BYTE); if (size == EA_8BYTE) code += 4; } else { // Is the operand size larger than a byte? switch (size) { case EA_1BYTE: break; case EA_2BYTE: // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); __fallthrough; case EA_4BYTE: #ifdef _TARGET_AMD64_ case EA_8BYTE: #endif // Set the 'w' bit to get the large version code |= 0x1; break; #ifdef _TARGET_X86_ case EA_8BYTE: // Double operand - set the appropriate bit code |= 0x04; break; #endif // _TARGET_X86_ default: assert(!"unexpected size"); } } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); if (code) { if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset) dst += emitOutputByte(dst, code); else dst += emitOutputWord(dst, code); } // Do we have a constant or a static data member? doff = Compiler::eeGetJitDataOffs(fldh); if (doff >= 0) { addr = emitConsBlock + doff; int byteSize = EA_SIZE_IN_BYTES(size); #ifdef _TARGET_AMD64_ // this instruction has a fixed size (4) src. if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss) byteSize = 4; // This has a fixed size (8) source. if (ins == INS_vbroadcastsd) byteSize = 8; #endif // Check that the offset is properly aligned (i.e. the ddd in [ddd]) assert((emitChkAlign==false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0)); } else { // Special case: mov reg, fs:[ddd] or mov reg, [ddd] if (jitStaticFldIsGlobAddr(fldh)) { addr = nullptr; } else { addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr); if (addr == nullptr) { NO_WAY("could not obtain address of static field"); } } } BYTE* target = (addr + offs); if (!isMoffset) { INT32 addlDelta = 0; if (addc) { // It is of the form "ins [disp], immed" // For emitting relocation, we also need to take into account of the // additional bytes of code emitted for immed val. ssize_t cval = addc->cnsVal; #ifdef _TARGET_AMD64_ // all these opcodes only take a sign-extended 4-byte immediate noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); #else noway_assert(opsz <= 4); #endif switch (opsz) { case 0: case 4: case 8: addlDelta = -4; break; case 2: addlDelta = -2; break; case 1: addlDelta = -1; break; default: assert(!"unexpected operand size"); unreached(); } } #ifdef _TARGET_AMD64_ // All static field and data section constant accesses should be marked as relocatable noway_assert(id->idIsDspReloc()); dst += emitOutputLong(dst, 0); #else //_TARGET_X86_ dst += emitOutputLong(dst, (int)target); #endif //_TARGET_X86_ #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta); } #endif } else { #ifdef _TARGET_AMD64_ // This code path should never be hit on amd64 since it always uses RIP relative addressing. // In future if ever there is a need to enable this special case, also enable the logic // that sets isMoffset to true on amd64. unreached(); #else //_TARGET_X86_ dst += emitOutputSizeT(dst, (ssize_t)target); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET); } #endif #endif //_TARGET_X86_ } // Now generate the constant value, if present if (addc) { ssize_t cval = addc->cnsVal; #ifdef _TARGET_AMD64_ // all these opcodes only take a sign-extended 4-byte immediate noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); #endif switch (opsz) { case 0: case 4: case 8: dst += emitOutputLong(dst, cval); break; case 2: dst += emitOutputWord(dst, cval); break; case 1: dst += emitOutputByte(dst, cval); break; default: assert(!"unexpected operand size"); } #ifdef RELOC_SUPPORT if (addc->cnsReloc) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); assert(opsz == 4); } #endif } // Does this instruction operate on a GC ref value? if (id->idGCref()) { switch (id->idInsFmt()) { case IF_MRD: case IF_MRW: case IF_MWR: break; case IF_RRD_MRD: break; case IF_RWR_MRD: emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_MRD_RRD: case IF_MWR_RRD: case IF_MRW_RRD: break; case IF_MRD_CNS: case IF_MWR_CNS: case IF_MRW_CNS: break; case IF_RRW_MRD: assert(id->idGCref() == GCT_BYREF); assert(ins == INS_add || ins == INS_sub); // Mark it as holding a GCT_BYREF emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } } else { if (emitInsCanOnlyWriteSSE2OrAVXReg(id)) { } else { switch (id->idInsFmt()) { case IF_RWR_MRD: emitGCregDeadUpd(id->idReg1(), dst); break; default: break; } if (ins == INS_mulEAX || ins == INS_imulEAX) { emitGCregDeadUpd(REG_EAX, dst); emitGCregDeadUpd(REG_EDX, dst); } // For the three operand imul instruction the target register // is encoded in the opcode if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); emitGCregDeadUpd(tgtReg, dst); } } } return dst; } /***************************************************************************** * * Output an instruction with one register operand. */ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) { size_t code; instruction ins = id->idIns(); regNumber reg = id->idReg1(); emitAttr size = id->idOpSize(); // We would to update GC info correctly assert(!IsSSE2Instruction(ins)); assert(!IsAVXInstruction(ins)); // Get the 'base' opcode switch (ins) { case INS_inc: case INS_dec: #ifdef _TARGET_AMD64_ if (true) #else if (size == EA_1BYTE) #endif { assert(INS_inc_l == INS_inc + 1); assert(INS_dec_l == INS_dec + 1); // Can't use the compact form, use the long form ins = (instruction)(ins + 1); if (size == EA_2BYTE) { // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); } code = insCodeRR(ins); if (size != EA_1BYTE) { // Set the 'w' bit to get the large version code |= 0x1; } if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } // Register... unsigned regcode = insEncodeReg012(ins, reg, size, &code); // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code | (regcode << 8)); } else { if (size == EA_2BYTE) { // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); } dst += emitOutputByte(dst, insCodeRR(ins ) | insEncodeReg012(ins, reg, size, NULL)); } break; case INS_pop: case INS_pop_hide: case INS_push: case INS_push_hide: assert(size == EA_PTRSIZE); code = insEncodeOpreg(ins, reg, size); assert(!TakesVexPrefix(ins)); assert(!TakesRexWPrefix(ins, size)); // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); break; case INS_seto: case INS_setno: case INS_setb: case INS_setae: case INS_sete: case INS_setne: case INS_setbe: case INS_seta: case INS_sets: case INS_setns: case INS_setpe: case INS_setpo: case INS_setl: case INS_setge: case INS_setle: case INS_setg: assert(id->idGCref() == GCT_NONE); assert(size == EA_1BYTE); code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins)); // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // We expect this to always be a 'big' opcode assert(code & 0x00FF0000); dst += emitOutputByte(dst, code >> 16); dst += emitOutputWord(dst, code & 0x0000FFFF); break; case INS_mulEAX: case INS_imulEAX: // Kill off any GC refs in EAX or EDX emitGCregDeadUpd(REG_EAX, dst); emitGCregDeadUpd(REG_EDX, dst); __fallthrough; default: assert(id->idGCref() == GCT_NONE); code = insEncodeMRreg(ins, reg, size, insCodeMR(ins)); if (size != EA_1BYTE) { // Set the 'w' bit to get the large version code |= 0x1; if (size == EA_2BYTE) { // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); } } code = AddVexPrefixIfNeeded(ins, code, size); if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); break; } // Are we writing the register? if so then update the GC information switch (id->idInsFmt()) { case IF_RRD: break; case IF_RWR: if (id->idGCref()) emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); else emitGCregDeadUpd(id->idReg1(), dst); break; case IF_RRW: { #ifdef DEBUG regMaskTP regMask = genRegMask(reg); #endif if (id->idGCref()) { // The reg must currently be holding either a gcref or a byref // and the instruction must be inc or dec assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) && (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l)); assert(id->idGCref() == GCT_BYREF); // Mark it as holding a GCT_BYREF emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); } else { // Can't use RRW to trash a GC ref. It's OK for unverifiable code // to trash Byrefs. assert((emitThisGCrefRegs & regMask) == 0); } } break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected instruction format"); break; } return dst; } /***************************************************************************** * * Output an instruction with two register operands. */ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { size_t code; instruction ins = id->idIns(); regNumber reg1 = id->idReg1(); regNumber reg2 = id->idReg2(); emitAttr size = id->idOpSize(); // Get the 'base' opcode code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); if (IsSSE2Instruction(ins) || IsAVXInstruction(ins)) { code = insEncodeRMreg(ins, code); if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } } else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins))) { code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE); #ifdef _TARGET_AMD64_ assert((size < EA_4BYTE) || (insIsCMOV(ins))); if ((size == EA_8BYTE) || (ins == INS_movsx)) { code = AddRexWPrefix(ins, code); } } else if (ins == INS_movsxd) { code = insEncodeRMreg(ins, code); #endif // _TARGET_AMD64_ } else { code = insEncodeMRreg(ins, insCodeMR(ins)); if (ins != INS_test) { code |= 2; } switch (size) { case EA_1BYTE: noway_assert(RBM_BYTE_REGS & genRegMask(reg1)); noway_assert(RBM_BYTE_REGS & genRegMask(reg2)); break; case EA_2BYTE: // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); __fallthrough; case EA_4BYTE: // Set the 'w' bit to get the large version code |= 0x1; break; #ifdef _TARGET_AMD64_ case EA_8BYTE: // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it // Don't need to zero out the high bits explicitly if ((ins != INS_xor) || (reg1 != reg2)) { code = AddRexWPrefix(ins, code); } // Set the 'w' bit to get the large version code |= 0x1; break; #endif // _TARGET_AMD64_ default: assert(!"unexpected size"); } } unsigned regCode = insEncodeReg345(ins, reg1, size, &code); regCode |= insEncodeReg012(ins, reg2, size, &code); // In case of AVX instructions that take 3 operands, we generally want to encode reg1 // as first source. In this case, reg1 is both a source and a destination. // The exception is the "merge" 3-operand case, where we have a move instruction, such // as movss, and we want to merge the source with itself. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source/dest operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, reg1, size, code); } else if (IsThreeOperandMoveAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, reg2, size, code); } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Is this a 'big' opcode? if (code & 0xFF000000) { // Output the highest word of the opcode dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; } else if(code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte // encoding. Otherwise, this is an instruction with a 4-byte encoding, // and the MOd/RM encoding needs to go in the 5th byte. // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte. // We probably need a different mechanism to identify the 4-byte encodings. if ((code & 0xFF) == 0x00) { // This case happens for AVX instructions only assert(IsAVXInstruction(ins)); if ((code & 0xFF00) == 0xC000) { dst += emitOutputByte(dst, (0xC0 | regCode)); } else { dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); } } else if ((code & 0xFF00) == 0xC000) { dst += emitOutputWord(dst, code | (regCode << 8)); } else { dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, (0xC0 | regCode)); } // Does this instruction operate on a GC ref value? if (id->idGCref()) { switch (id->idInsFmt()) { case IF_RRD_RRD: break; case IF_RWR_RRD: if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0) { // We're relocating "this" in the prolog assert(emitComp->lvaIsOriginalThisArg(0)); assert(emitComp->lvaTable[0].lvRegister); assert(emitComp->lvaTable[0].lvRegNum == reg1); if (emitFullGCinfo) { emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true); break; } else { /* If emitFullGCinfo==false, the we don't use any regPtrDsc's and so explictly note the location of "this" in GCEncode.cpp */ } } emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_RRW_RRD: switch (id->idIns()) { /* This must be one of the following cases: xor reg, reg to assign NULL and r1 , r2 if (ptr1 && ptr2) ... or r1 , r2 if (ptr1 || ptr2) ... add r1 , r2 to compute a normal byref sub r1 , r2 to compute a strange byref (VC only) */ case INS_xor: assert(id->idReg1() == id->idReg2()); emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case INS_or: case INS_and: emitGCregDeadUpd(id->idReg1(), dst); break; case INS_add: case INS_sub: assert(id->idGCref() == GCT_BYREF); #ifdef DEBUG regMaskTP regMask; regMask = genRegMask(reg1) | genRegMask(reg2); // r1/r2 could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF assert(((regMask & emitThisGCrefRegs) && (ins == INS_add )) || ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); #endif // Mark r1 as holding a byref emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC reg update instruction"); } break; case IF_RRW_RRW: // This must be "xchg reg1, reg2" assert(id->idIns() == INS_xchg); // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC // register pointer mask. #ifndef LEGACY_BACKEND GCtype gc1, gc2; gc1 = emitRegGCtype(reg1); gc2 = emitRegGCtype(reg2); if (gc1 != gc2) { // Kill the GC-info about the GC registers if (needsGC(gc1)) emitGCregDeadUpd(reg1, dst); if (needsGC(gc2)) emitGCregDeadUpd(reg2, dst); // Now, swap the info if (needsGC(gc1)) emitGCregLiveUpd(gc1, reg2, dst); if (needsGC(gc2)) emitGCregLiveUpd(gc2, reg1, dst); } #endif // !LEGACY_BACKEND break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } } else { if (emitInsCanOnlyWriteSSE2OrAVXReg(id)) { } else { switch (id->idInsFmt()) { case IF_RRD_CNS: // INS_mulEAX can not be used with any of these formats assert(ins != INS_mulEAX && ins != INS_imulEAX); // For the three operand imul instruction the target // register is encoded in the opcode if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); emitGCregDeadUpd(tgtReg, dst); } break; case IF_RWR_RRD: case IF_RRW_RRD: // INS_movxmm2i writes to reg2. if (ins == INS_mov_xmm2i) { emitGCregDeadUpd(id->idReg2(), dst); } else { emitGCregDeadUpd(id->idReg1(), dst); } break; default: break; } } } return dst; } #ifdef FEATURE_AVX_SUPPORT BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) { size_t code; instruction ins = id->idIns(); assert(IsAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); regNumber targetReg = id->idReg1(); regNumber src1 = id->idReg2(); regNumber src2 = id->idReg3(); emitAttr size = id->idOpSize(); code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code); if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } unsigned regCode = insEncodeReg345(ins, targetReg, size, &code); regCode |= insEncodeReg012(ins, src2, size, &code); // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, src1, size, code); // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Is this a 'big' opcode? if (code & 0xFF000000) { // Output the highest word of the opcode dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; } else if (code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte // encoding. Otherwise, this is an instruction with a 4-byte encoding, // and the MOd/RM encoding needs to go in the 5th byte. // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte. // We probably need a different mechanism to identify the 4-byte encodings. if ((code & 0xFF) == 0x00) { // This case happens for AVX instructions only assert(IsAVXInstruction(ins)); if ((code & 0xFF00) == 0xC000) { dst += emitOutputByte(dst, (0xC0 | regCode)); } else { dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); } } else if ((code & 0xFF00) == 0xC000) { dst += emitOutputWord(dst, code | (regCode << 8)); } else { dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, (0xC0 | regCode)); } noway_assert(!id->idGCref()); return dst; } #endif /***************************************************************************** * * Output an instruction with a register and constant operands. */ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) { size_t code; emitAttr size = id->idOpSize(); instruction ins = id->idIns(); regNumber reg = id->idReg1(); ssize_t val = emitGetInsSC(id); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte } #endif noway_assert(emitVerifyEncodable(ins, size, reg)); #ifdef _TARGET_AMD64_ if (IsSSEOrAVXInstruction(ins)) // Handle SSE2 instructions of the form "opcode reg, immed8" { assert(id->idGCref() == GCT_NONE); assert(valInByte); assert(ins == INS_psrldq || ins == INS_pslldq); // Get the 'base' opcode. code = insCodeMI(ins); code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeMIreg(ins, reg, size, code); assert(code & 0x00FF0000); if (TakesVexPrefix(ins)) { // The 'vvvv' bits encode the destination register, which for this case (RI) // is the same as the source. code = insEncodeReg3456(ins, reg, size, code); } // In case of psrldq // Reg/Opcode = 3 // R/M = reg1 // // In case of pslldq // Reg/Opcode = 7 // R/M = reg1 regNumber regOpcode = (regNumber) ((ins == INS_psrldq) ? 3 : 7); unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8; // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) dst += emitOutputWord(dst, code >> 16); else if (code & 0xFF0000) dst += emitOutputByte(dst, code >> 16); dst += emitOutputWord(dst, code | regcode); dst += emitOutputByte(dst, val); return dst; } #endif //_TARGET_AMD64_ // The 'mov' opcode is special if (ins == INS_mov) { code = insCodeACC(ins); assert(code < 0x100); code |= 0x08; // Set the 'w' bit unsigned regcode = insEncodeReg012(ins, reg, size, &code); code |= regcode; // This is INS_mov and will not take VEX prefix assert(!TakesVexPrefix(ins)); if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); if (size == EA_4BYTE) { dst += emitOutputLong(dst, val); } #ifdef _TARGET_AMD64_ else { assert(size == EA_PTRSIZE); dst += emitOutputSizeT(dst, val); } #endif #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET); } #endif goto DONE; } // Decide which encoding is the shortest bool useSigned, useACC; if (reg == REG_EAX && !instrIs3opImul(ins)) { if (size == EA_1BYTE || (ins == INS_test)) { // For al, ACC encoding is always the smallest useSigned = false; useACC = true; } else { /* For ax/eax, we avoid ACC encoding for small constants as we * can emit the small constant and have it sign-extended. * For big constants, the ACC encoding is better as we can use * the 1 byte opcode */ if (valInByte) { // avoid using ACC encoding useSigned = true; useACC = false; } else { useSigned = false; useACC = true; } } } else { useACC = false; if (valInByte) useSigned = true; else useSigned = false; } // "test" has no 's' bit if (ins == INS_test) useSigned = false; // Get the 'base' opcode if (useACC) { assert(!useSigned); code = insCodeACC(ins); } else { assert(!useSigned || valInByte); // Some instructions (at least 'imul') do not have a // r/m, immed form, but do have a dstReg,srcReg,imm8 form. if (valInByte && useSigned && insNeedsRRIb(ins)) { code = insEncodeRRIb(ins, reg, size); } else { code = insCodeMI(ins); code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeMIreg(ins, reg, size, code); } } switch (size) { case EA_1BYTE: break; case EA_2BYTE: // Output a size prefix for a 16-bit operand dst += emitOutputByte(dst, 0x66); __fallthrough; case EA_4BYTE: // Set the 'w' bit to get the large version code |= 0x1; break; #ifdef _TARGET_AMD64_ case EA_8BYTE: /* Set the 'w' bit to get the large version */ /* and the REX.W bit to get the really large version */ code = AddRexWPrefix(ins, code); code |= 0x1; break; #endif default: assert(!"unexpected size"); } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Does the value fit in a sign-extended byte? // Important! Only set the 's' bit when we have a size larger than EA_1BYTE. // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode. if (useSigned && (size > EA_1BYTE)) { // We can just set the 's' bit, and issue an immediate byte code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte. dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, val); } else { // Can we use an accumulator (EAX) encoding? if (useACC) dst += emitOutputByte(dst, code); else dst += emitOutputWord(dst, code); switch (size) { case EA_1BYTE: dst += emitOutputByte(dst, val); break; case EA_2BYTE: dst += emitOutputWord(dst, val); break; case EA_4BYTE: dst += emitOutputLong(dst, val); break; #ifdef _TARGET_AMD64_ case EA_8BYTE: dst += emitOutputLong(dst, val); break; #endif // _TARGET_AMD64_ default: break; } #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW); assert(size == EA_4BYTE); } #endif } DONE: // Does this instruction operate on a GC ref value? if (id->idGCref()) { switch (id->idInsFmt()) { case IF_RRD_CNS: break; case IF_RWR_CNS: emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; case IF_RRW_CNS: assert(id->idGCref() == GCT_BYREF); #ifdef DEBUG regMaskTP regMask; regMask = genRegMask(reg); // FIXNOW review the other places and relax the assert there too // The reg must currently be holding either a gcref or a byref // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF if (emitThisGCrefRegs & regMask) assert(ins == INS_add); if (emitThisByrefRegs & regMask) assert(ins == INS_add || ins == INS_sub); #endif // Mark it as holding a GCT_BYREF emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } // mul can never produce a GC ref assert(!instrIs3opImul(ins)); assert(ins != INS_mulEAX && ins != INS_imulEAX); } else { switch (id->idInsFmt()) { case IF_RRD_CNS: // INS_mulEAX can not be used with any of these formats assert(ins != INS_mulEAX && ins != INS_imulEAX); // For the three operand imul instruction the target // register is encoded in the opcode if (instrIs3opImul(ins)) { regNumber tgtReg = inst3opImulReg(ins); emitGCregDeadUpd(tgtReg, dst); } break; case IF_RRW_CNS: case IF_RWR_CNS: assert(!instrIs3opImul(ins)); emitGCregDeadUpd(id->idReg1(), dst); break; default: #ifdef DEBUG emitDispIns(id, false, false, false); #endif assert(!"unexpected GC ref instruction format"); } } return dst; } /***************************************************************************** * * Output an instruction with a constant operand. */ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) { size_t code; instruction ins = id->idIns(); emitAttr size = id->idOpSize(); ssize_t val = emitGetInsSC(id); bool valInByte = ((signed char)val == val); // We would to update GC info correctly assert(!IsSSE2Instruction(ins)); assert(!IsAVXInstruction(ins)); #ifdef _TARGET_AMD64_ // all these opcodes take a sign-extended 4-byte immediate, max noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc())); #endif #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte // Of these instructions only the push instruction can have reloc assert(ins == INS_push || ins == INS_push_hide); } #endif switch (ins) { case INS_jge: assert((val >= -128) && (val <= 127)); dst += emitOutputByte(dst, insCode(ins)); dst += emitOutputByte(dst, val); break; case INS_loop: assert((val >= -128) && (val <= 127)); dst += emitOutputByte(dst, insCodeMI(ins)); dst += emitOutputByte(dst, val); break; case INS_ret: assert(val); dst += emitOutputByte(dst, insCodeMI(ins)); dst += emitOutputWord(dst, val); break; case INS_push_hide: case INS_push: code = insCodeMI(ins); // Does the operand fit in a byte? if (valInByte) { dst += emitOutputByte(dst, code|2); dst += emitOutputByte(dst, val); } else { if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); } dst += emitOutputByte(dst, code); dst += emitOutputLong(dst, val); #ifdef RELOC_SUPPORT if (id->idIsCnsReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW); } #endif } // Did we push a GC ref value? if (id->idGCref()) { #ifdef DEBUG printf("UNDONE: record GCref push [cns]\n"); #endif } break; default: assert(!"unexpected instruction"); } return dst; } /***************************************************************************** * * Output a local jump instruction. * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that needs * to get bound to an actual address and processed by branch shortening. */ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) { unsigned srcOffs; unsigned dstOffs; ssize_t distVal; instrDescJmp* id = (instrDescJmp*)i; instruction ins = id->idIns(); bool jmp; bool relAddr = true; // does the instruction use relative-addressing? // SSE2 doesnt make any sense here assert(!IsSSE2Instruction(ins)); assert(!IsAVXInstruction(ins)); size_t ssz; size_t lsz; switch (ins) { default: ssz = JCC_SIZE_SMALL; lsz = JCC_SIZE_LARGE; jmp = true; break; case INS_jmp: ssz = JMP_SIZE_SMALL; lsz = JMP_SIZE_LARGE; jmp = true; break; case INS_call: ssz = lsz = CALL_INST_SIZE; jmp = false; break; case INS_push_hide: case INS_push: ssz = lsz = 5; jmp = false; relAddr = false; break; case INS_mov: case INS_lea: ssz = lsz = id->idCodeSize(); jmp = false; relAddr = false; break; } // Figure out the distance to the target srcOffs = emitCurCodeOffs(dst); dstOffs = id->idAddr()->iiaIGlabel->igOffs; if (relAddr) distVal = (ssize_t) (emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); else distVal = (ssize_t) emitOffsetToPtr(dstOffs); if (dstOffs <= srcOffs) { // This is a backward jump - distance is known at this point #if DEBUG_EMIT if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) { size_t blkOffs = id->idjIG->igOffs; if (INTERESTING_JUMP_NUM == 0) printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum); printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj); printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj); printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj); } #endif // Can we use a short jump? if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG) { emitSetShortJump(id); } } else { // This is a forward jump - distance will be an upper limit emitFwdJumps = true; // The target offset will be closer by at least 'emitOffsAdj', but only if this // jump doesn't cross the hot-cold boundary. if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) { dstOffs -= emitOffsAdj; distVal -= emitOffsAdj; } // Record the location of the jump for later patching id->idjOffs = dstOffs; // Are we overflowing the id->idjOffs bitfield? if (id->idjOffs != dstOffs) IMPL_LIMITATION("Method is too large"); #if DEBUG_EMIT if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) { size_t blkOffs = id->idjIG->igOffs; if (INTERESTING_JUMP_NUM == 0) printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum); printf("[4] Jump block is at %08X\n" , blkOffs); printf("[4] Jump is at %08X\n" , srcOffs); printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs); } #endif // Can we use a short jump? if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS) { emitSetShortJump(id); } } // Adjust the offset to emit relative to the end of the instruction if (relAddr) distVal -= id->idjShort ? ssz : lsz; #ifdef DEBUG if (0&&emitComp->verbose) { size_t sz = id->idjShort ?ssz:lsz; int distValSize = id->idjShort ? 4 : 8; printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs)?"Fwd":"Bwd", emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs+sz, distValSize, dstOffs, distVal); } #endif // What size jump should we use? if (id->idjShort) { // Short jump assert(!id->idjKeepLong); assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false); assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL); assert(JMP_SIZE_SMALL == 2); assert(jmp); if (emitInstCodeSz(id) != JMP_SIZE_SMALL) { emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL; #ifdef DEBUG if (emitComp->verbose) printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id)); #endif } dst += emitOutputByte(dst, insCode(ins)); // For forward jumps, record the address of the distance value id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr; dst += emitOutputByte(dst, distVal); } else { size_t code; // Long jump if (jmp) { assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp); assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo ); assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb ); assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae); assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je ); assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne); assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe); assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja ); assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js ); assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns); assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe); assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo); assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl ); assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge); assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle); assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg ); code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp))); } else if (ins == INS_push || ins == INS_push_hide) { assert(insCodeMI(INS_push) == 0x68); code = 0x68; } else if (ins == INS_mov) { // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us insFormat tmpInsFmt = id->idInsFmt(); insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel; bool tmpDspReloc = id->idIsDspReloc(); id->idInsFmt(IF_SWR_CNS); id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar; id->idSetIsDspReloc(false); dst = emitOutputSV(dst, id, insCodeMI(ins)); // Restore id fields with original values id->idInsFmt(tmpInsFmt); id->idAddr()->iiaIGlabel = tmpIGlabel; id->idSetIsDspReloc(tmpDspReloc); code = 0xCC; } else if (ins == INS_lea) { // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us. // We basically are doing what emitIns_R_AI does. // TODO-XArch-Cleanup: revisit this. instrDescAmd idAmdStackLocal; instrDescAmd* idAmd = &idAmdStackLocal; *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields memset((BYTE*)idAmd + sizeof(instrDesc), 0, sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied idAmd->idInsFmt(IF_RWR_ARD); idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA; idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA; emitSetAmdDisp(idAmd, distVal); // set the displacement idAmd->idSetIsDspReloc(id->idIsDspReloc()); assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins)); idAmd->idCodeSize(sz); code = insCodeRM(ins); code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8); dst = emitOutputAM(dst, idAmd, code, nullptr); code = 0xCC; // For forward jumps, record the address of the distance value // Hard-coded 4 here because we already output the displacement, as the last thing. id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr; // We're done return dst; } else { code = 0xE8; } if (ins != INS_mov) { dst += emitOutputByte(dst, code); if (code & 0xFF00) dst += emitOutputByte(dst, code >> 8); } // For forward jumps, record the address of the distance value id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL; dst += emitOutputLong(dst, distVal); #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation if (emitComp->opts.compReloc) #endif { if (!relAddr) { emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW); } else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) { assert(id->idjKeepLong); emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32); } } } // Local calls kill all registers if (ins == INS_call && (emitThisGCrefRegs|emitThisByrefRegs)) emitGCregDeadUpdMask(emitThisGCrefRegs|emitThisByrefRegs, dst); return dst; } /***************************************************************************** * * Append the machine code corresponding to the given instruction descriptor * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' * is the instruction group that contains the instruction. Updates '*dp' to * point past the generated code, and returns the size of the instruction * descriptor in bytes. */ #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable:21000) // Suppress PREFast warning about overly large function #endif size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { assert(emitIssuing); BYTE* dst = *dp; size_t sz = sizeof(instrDesc); instruction ins = id->idIns(); unsigned char callInstrSize = 0; #ifdef DEBUG bool dspOffs = emitComp->opts.dspGCtbls; #endif // DEBUG emitAttr size = id->idOpSize(); assert(REG_NA == (int)REG_NA); assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal()); // What instruction format have we got? switch (id->idInsFmt()) { size_t code; size_t regcode; int args; CnsVal cnsVal; BYTE* addr; bool recCall; regMaskTP gcrefRegs; regMaskTP byrefRegs; /********************************************************************/ /* No operands */ /********************************************************************/ case IF_NONE: // the loop alignment pseudo instruction if (ins == INS_align) { sz = TINY_IDSC_SIZE; dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f); assert(((size_t)dst & 0x0f) == 0); break; } if (ins == INS_nop) { dst = emitOutputNOP(dst, id->idCodeSize()); break; } // the cdq instruction kills the EDX register implicitly if (ins == INS_cdq) emitGCregDeadUpd(REG_EDX, dst); __fallthrough; #if FEATURE_STACK_FP_X87 case IF_TRD: case IF_TWR: case IF_TRW: #endif // FEATURE_STACK_FP_X87 assert(id->idGCref() == GCT_NONE); code = insCodeMR(ins); #ifdef _TARGET_AMD64_ // Support only scalar AVX instructions and hence size is hard coded to 4-byte. code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE); if (ins == INS_cdq && TakesRexWPrefix(ins,id->idOpSize())) { code = AddRexWPrefix(ins, code); } dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); #endif // Is this a 'big' opcode? if (code & 0xFF000000) { // The high word and then the low word dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; dst += emitOutputWord(dst, code); } else if (code & 0x00FF0000) { // The high byte and then the low word dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; dst += emitOutputWord(dst, code); } else if (code & 0xFF00) { // The 2 byte opcode dst += emitOutputWord(dst, code); } else { // The 1 byte opcode dst += emitOutputByte(dst, code); } break; /********************************************************************/ /* Simple constant, local label, method */ /********************************************************************/ case IF_CNS: dst = emitOutputIV(dst, id); sz = emitSizeOfInsDsc(id); break; case IF_LABEL: case IF_RWR_LABEL: case IF_SWR_LABEL: assert(id->idGCref() == GCT_NONE); assert(id->idIsBound()); // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()? dst = emitOutputLJ(dst, id); sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); break; case IF_METHOD: case IF_METHPTR: // Assume we'll be recording this call recCall = true; // Get hold of the argument count and field Handle args = emitGetInsCDinfo(id); // Is this a "fat" call descriptor? if (id->idIsLargeCall()) { instrDescCGCA* idCall = (instrDescCGCA*) id; gcrefRegs = idCall->idcGcrefRegs; byrefRegs = idCall->idcByrefRegs; VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); sz = sizeof(instrDescCGCA); } else { assert(!id->idIsLargeDsp()); assert(!id->idIsLargeCns()); gcrefRegs = emitDecodeCallGCregs(id); byrefRegs = 0; VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); sz = sizeof(instrDesc); } addr = (BYTE*)id->idAddr()->iiaAddr; assert(addr != nullptr); // Some helpers don't get recorded in GC tables if (id->idIsNoGC()) { recCall = false; } // What kind of a call do we have here? if (id->idInsFmt() == IF_METHPTR) { // This is call indirect via a method pointer code = insCodeMR(ins); if (ins == INS_i_jmp) { code |= 1; } if (id->idIsDspReloc()) { dst += emitOutputWord(dst, code | 0x0500); #ifdef _TARGET_AMD64_ dst += emitOutputLong(dst, 0); #else dst += emitOutputLong(dst, (int)addr); #endif emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32); } else { #ifdef _TARGET_X86_ dst += emitOutputWord(dst, code | 0x0500); #else //_TARGET_AMD64_ // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. // This addr mode should never be used while generating relocatable ngen code nor if // the addr can be encoded as pc-relative address. noway_assert(!emitComp->opts.compReloc); noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32); noway_assert((int)addr == (ssize_t)addr); // This requires, specifying a SIB byte after ModRM byte. dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, 0x25); #endif //_TARGET_AMD64_ dst += emitOutputLong(dst, (int)addr); } goto DONE_CALL; } // Else // This is call direct where we know the target, thus we can // use a direct call; the target to jump to is in iiaAddr. assert(id->idInsFmt() == IF_METHOD); // Output the call opcode followed by the target distance dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins)); ssize_t offset; #ifdef _TARGET_AMD64_ // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst. offset = 0; assert(id->idIsDspReloc()); #else // Calculate PC relative displacement. // Although you think we should be using sizeof(void*), the x86 and x64 instruction set // only allow a 32-bit offset, so we correctly use sizeof(INT32) offset = addr - (dst + sizeof(INT32)); #endif dst += emitOutputLong(dst, offset); #ifdef RELOC_SUPPORT if (id->idIsDspReloc()) { emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32); } #endif DONE_CALL: /* We update the GC info before the call as the variables cannot be used by the call. Killing variables before the call helps with boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. If we ever track aliased variables (which could be used by the call), we would have to keep them alive past the call. */ assert(FitsIn(dst - *dp)); callInstrSize = static_cast(dst - *dp); emitUpdateLiveGCvars(GCvars, *dp); // If the method returns a GC ref, mark EAX appropriately if (id->idGCref() == GCT_GCREF) gcrefRegs |= RBM_EAX; else if (id->idGCref() == GCT_BYREF) byrefRegs |= RBM_EAX; // If the GC register set has changed, report the new set if (gcrefRegs != emitThisGCrefRegs) emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); if (byrefRegs != emitThisByrefRegs) emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); if (recCall || args) { // For callee-pop, all arguments will be popped after the call. // For caller-pop, any GC arguments will go dead after the call. assert(callInstrSize != 0); if (args >= 0) emitStackPop(dst, /*isCall*/true, callInstrSize, args); else emitStackKillArgs(dst, -args, callInstrSize); } // Do we need to record a call location for GC purposes? if (!emitFullGCinfo && recCall) { assert(callInstrSize != 0); emitRecordGCcall(dst, callInstrSize); } #ifdef DEBUG if (ins == INS_call) { emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig, (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); } #endif // DEBUG break; /********************************************************************/ /* One register operand */ /********************************************************************/ case IF_RRD: case IF_RWR: case IF_RRW: dst = emitOutputR(dst, id); sz = TINY_IDSC_SIZE; break; /********************************************************************/ /* Register and register/constant */ /********************************************************************/ case IF_RRW_SHF: code = insCodeMR(ins); // Emit the VEX prefix if it exists code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeMRreg(ins, id->idReg1(), size, code); // set the W bit if (size != EA_1BYTE) { code |= 1; } // Emit the REX prefix if it exists if (TakesRexWPrefix(ins, size)) { code = AddRexWPrefix(ins, code); } // Output a size prefix for a 16-bit operand if (size == EA_2BYTE) { dst += emitOutputByte(dst, 0x66); } dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); break; case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: case IF_RRW_RRW: dst = emitOutputRR(dst, id); sz = TINY_IDSC_SIZE; break; case IF_RRD_CNS: case IF_RWR_CNS: case IF_RRW_CNS: dst = emitOutputRI(dst, id); sz = emitSizeOfInsDsc(id); break; #ifdef FEATURE_AVX_SUPPORT case IF_RWR_RRD_RRD: dst = emitOutputRRR(dst, id); sz = emitSizeOfInsDsc(id); break; #endif case IF_RRW_RRW_CNS: assert(id->idGCref() == GCT_NONE); // Get the 'base' opcode (it's a big one) // Also, determine which operand goes where in the ModRM byte. regNumber mReg; regNumber rReg; // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128) if (hasCodeMR(ins)) { code = insCodeMR(ins); // Emit the VEX prefix if it exists code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeMRreg(ins, code); mReg = id->idReg1(); rReg = id->idReg2(); } else { code = insCodeRM(ins); // Emit the VEX prefix if it exists code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code); mReg = id->idReg2(); rReg = id->idReg1(); } assert(code & 0x00FF0000); #ifdef FEATURE_AVX_SUPPORT if (TakesVexPrefix(ins)) { if (IsThreeOperandBinaryAVXInstruction(ins)) { // Encode source/dest operand reg in 'vvvv' bits in 1's complement form // This code will have to change when we support 3 operands. // For now, we always overload this source with the destination (always reg1). // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination, // e.g. pslldq, when/if we support those instructions with 2 registers.) // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination) code = insEncodeReg3456(ins, id->idReg1(), size, code); } else if (IsThreeOperandMoveAVXInstruction(ins)) { // This is a "merge" move instruction. // Encode source operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, id->idReg2(), size, code); } } #endif // FEATURE_AVX_SUPPORT regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)) << 8; // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); if (UseAVX() && Is4ByteAVXInstruction(ins)) { // We just need to output the last byte of the opcode. assert((code & 0xFF) == 0); assert((code & 0xFF00) != 0xC000); dst += emitOutputByte(dst, (code >> 8) & 0xFF); code = 0; } else if (code & 0xFF000000) { dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; } else if (code & 0x00FF0000) { dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; } // Note that regcode is shifted by 8-bits above to align with RM byte. if (code != 0) { assert((code & 0xFF00) == 0xC000); dst += emitOutputWord(dst, code | regcode); } else { // This case occurs for AVX instructions. // Note that regcode is left shifted by 8-bits. assert(Is4ByteAVXInstruction(ins)); dst += emitOutputByte(dst, 0xC0 | (regcode >> 8)); } dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); break; /********************************************************************/ /* Address mode operand */ /********************************************************************/ case IF_ARD: case IF_AWR: case IF_ARW: #if FEATURE_STACK_FP_X87 case IF_TRD_ARD: case IF_TWR_ARD: case IF_TRW_ARD: // case IF_ARD_TRD: case IF_AWR_TRD: // case IF_ARW_TRD: #endif // FEATURE_STACK_FP_X87 dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize); switch (ins) { case INS_call: IND_CALL: // Get hold of the argument count and method handle args = emitGetInsCIargs(id); // Is this a "fat" call descriptor? if (id->idIsLargeCall()) { instrDescCGCA* idCall = (instrDescCGCA*) id; gcrefRegs = idCall->idcGcrefRegs; byrefRegs = idCall->idcByrefRegs; VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); sz = sizeof(instrDescCGCA); } else { assert(!id->idIsLargeDsp()); assert(!id->idIsLargeCns()); gcrefRegs = emitDecodeCallGCregs(id); byrefRegs = 0; VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); sz = sizeof(instrDesc); } recCall = true; goto DONE_CALL; default: sz = emitSizeOfInsDsc(id); break; } break; case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputAM(dst, id, code | regcode); sz = emitSizeOfInsDsc(id); break; case IF_ARD_RRD: case IF_AWR_RRD: case IF_ARW_RRD: code = insCodeMR(ins); code = AddVexPrefixIfNeeded(ins, code, size); regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputAM(dst, id, code | regcode); sz = emitSizeOfInsDsc(id); break; case IF_ARD_CNS: case IF_AWR_CNS: case IF_ARW_CNS: emitGetInsAmdCns(id, &cnsVal); dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal); sz = emitSizeOfInsDsc(id); break; case IF_ARW_SHF: emitGetInsAmdCns(id, &cnsVal); dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal); sz = emitSizeOfInsDsc(id); break; /********************************************************************/ /* Stack-based operand */ /********************************************************************/ case IF_SRD: case IF_SWR: case IF_SRW: #if FEATURE_STACK_FP_X87 case IF_TRD_SRD: case IF_TWR_SRD: case IF_TRW_SRD: // case IF_SRD_TRD: case IF_SWR_TRD: // case IF_SRW_TRD: #endif // FEATURE_STACK_FP_X87 assert(ins != INS_pop_hide); if (ins == INS_pop) { // The offset in "pop [ESP+xxx]" is relative to the new ESP value #if !FEATURE_FIXED_OUT_ARGS emitCurStackLvl -= sizeof(int); #endif dst = emitOutputSV(dst, id, insCodeMR(ins)); #if !FEATURE_FIXED_OUT_ARGS emitCurStackLvl += sizeof(int); #endif break; } dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize); if (ins == INS_call) goto IND_CALL; break; case IF_SRD_CNS: case IF_SWR_CNS: case IF_SRW_CNS: emitGetInsCns(id, &cnsVal); dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal); sz = emitSizeOfInsDsc(id); break; case IF_SRW_SHF: emitGetInsCns(id, &cnsVal); dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal); sz = emitSizeOfInsDsc(id); break; case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: code = insCodeRM(ins); // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. if (Is4ByteAVXInstruction(ins)) { dst = emitOutputSV(dst, id, code); } else { code = AddVexPrefixIfNeeded(ins, code, size); // In case of AVX instructions that take 3 operands, encode reg1 as first source. // Note that reg1 is both a source and a destination. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, id->idReg1(), size, code); } regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputSV(dst, id, code | regcode); } break; case IF_SRD_RRD: case IF_SWR_RRD: case IF_SRW_RRD: code = insCodeMR(ins); code = AddVexPrefixIfNeeded(ins, code, size); // In case of AVX instructions that take 3 operands, encode reg1 as first source. // Note that reg1 is both a source and a destination. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, id->idReg1(), size, code); } regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputSV(dst, id, code | regcode); break; /********************************************************************/ /* Direct memory address */ /********************************************************************/ case IF_MRD: case IF_MRW: case IF_MWR: #if FEATURE_STACK_FP_X87 case IF_TRD_MRD: case IF_TWR_MRD: case IF_TRW_MRD: // case IF_MRD_TRD: case IF_MWR_TRD: // case IF_MRW_TRD: #endif // FEATURE_STACK_FP_X87 noway_assert(ins != INS_call); dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500); sz = emitSizeOfInsDsc(id); break; case IF_MRD_OFF: dst = emitOutputCV(dst, id, insCodeMI(ins)); break; case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: code = insCodeRM(ins); // Special case 4-byte AVX instructions if (Is4ByteAVXInstruction(ins)) { dst = emitOutputCV(dst, id, code); } else { code = AddVexPrefixIfNeeded(ins, code, size); // In case of AVX instructions that take 3 operands, encode reg1 as first source. // Note that reg1 is both a source and a destination. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, id->idReg1(), size, code); } regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputCV(dst, id, code | regcode | 0x0500); } sz = emitSizeOfInsDsc(id); break; case IF_RWR_MRD_OFF: code = insCode(ins); code = AddVexPrefixIfNeeded(ins, code, size); // In case of AVX instructions that take 3 operands, encode reg1 as first source. // Note that reg1 is both a source and a destination. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, id->idReg1(), size, code); } regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code); dst = emitOutputCV(dst, id, code | 0x30 | regcode); sz = emitSizeOfInsDsc(id); break; case IF_MRD_RRD: case IF_MWR_RRD: case IF_MRW_RRD: code = insCodeMR(ins); #ifdef FEATURE_AVX_SUPPORT code = AddVexPrefixIfNeeded(ins, code, size); // In case of AVX instructions that take 3 operands, encode reg1 as first source. // Note that reg1 is both a source and a destination. // // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For // now we use the single source as source1 and source2. // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { // encode source operand reg in 'vvvv' bits in 1's compliement form code = insEncodeReg3456(ins, id->idReg1(), size, code); } #endif // FEATURE_AVX_SUPPORT regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputCV(dst, id, code | regcode | 0x0500); sz = emitSizeOfInsDsc(id); break; case IF_MRD_CNS: case IF_MWR_CNS: case IF_MRW_CNS: emitGetInsDcmCns(id, &cnsVal); dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal); sz = emitSizeOfInsDsc(id); break; case IF_MRW_SHF: emitGetInsDcmCns(id, &cnsVal); dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal); sz = emitSizeOfInsDsc(id); break; #if FEATURE_STACK_FP_X87 /********************************************************************/ /* FP coprocessor stack operands */ /********************************************************************/ case IF_TRD_FRD: case IF_TWR_FRD: case IF_TRW_FRD: assert(id->idGCref() == GCT_NONE); dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8)); break; case IF_FRD_TRD: case IF_FWR_TRD: case IF_FRW_TRD: assert(id->idGCref() == GCT_NONE); dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8)); break; #endif // FEATURE_STACK_FP_X87 /********************************************************************/ /* oops */ /********************************************************************/ default: #ifdef DEBUG printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"don't know how to encode this instruction"); #endif break; } // Make sure we set the instruction descriptor size correctly assert(sz == emitSizeOfInsDsc(id)); #if !FEATURE_FIXED_OUT_ARGS // Make sure we keep the current stack level up to date if (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig)) { switch (ins) { case INS_push: // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the // finally block for calling it locally for an op_leave. emitStackPush(dst, id->idGCref()); break; case INS_pop: emitStackPop(dst, false, /*callInstrSize*/0, 1); break; case INS_sub: // Check for "sub ESP, icon" if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / sizeof(void*))); } break; case INS_add: // Check for "add ESP, icon" if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*))); } break; default: break; } } #endif // !FEATURE_FIXED_OUT_ARGS assert((int)emitCurStackLvl >= 0); // Only epilog "instructions" and some pseudo-instrs // are allowed not to generate any code assert(*dp != dst || emitInstHasNoCode(ins)); #ifdef DEBUG if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose) { emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); } if (emitComp->compDebugBreak) { // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs // at the beginning of this method. static ConfigDWORD fJitEmitPrintRefRegs; if (fJitEmitPrintRefRegs.val(CLRConfig::INTERNAL_JitEmitPrintRefRegs) != 0) { printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum); printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs)); printRegMaskInt(emitThisGCrefRegs); emitDispRegSet (emitThisGCrefRegs); printf("\n"); printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs)); printRegMaskInt(emitThisByrefRegs); emitDispRegSet (emitThisByrefRegs); printf("\n"); } // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for // emitting instruction a6, (i.e. IN00a6 in jitdump). static ConfigDWORD fJitBreakEmitOutputInstr; if ((unsigned)fJitBreakEmitOutputInstr.val(CLRConfig::INTERNAL_JitBreakEmitOutputInstr) == id->idDebugOnlyInfo()->idNum) { assert(!"JitBreakEmitOutputInstr reached"); } } #endif #ifdef TRANSLATE_PDB if (*dp != dst) { // only map instruction groups to instruction groups MapCode(id->idDebugOnlyInfo()->idilStart, *dp); } #endif *dp = dst; #ifdef DEBUG if (ins == INS_mulEAX || ins == INS_imulEAX) { // INS_mulEAX has implicit target of Edx:Eax. Make sure // that we detected this cleared its GC-status. assert(((RBM_EAX|RBM_EDX) & (emitThisGCrefRegs|emitThisByrefRegs)) == 0); } if (instrIs3opImul(ins)) { // The target of the 3-operand imul is implicitly encoded. Make sure // that we detected the implicit register and cleared its GC-status. regMaskTP regMask = genRegMask(inst3opImulReg(ins)); assert((regMask & (emitThisGCrefRegs|emitThisByrefRegs)) == 0); } #endif return sz; } #ifdef _PREFAST_ #pragma warning(pop) #endif /*****************************************************************************/ /*****************************************************************************/ #endif // defined(_TARGET_XARCH_)