diff options
Diffstat (limited to 'src/jit/emitxarch.cpp')
-rw-r--r-- | src/jit/emitxarch.cpp | 710 |
1 files changed, 318 insertions, 392 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index d43f766ee8..b6bacfa520 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -30,6 +30,15 @@ bool IsSSE2Instruction(instruction ins) return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION); } +bool IsSSE4Instruction(instruction ins) +{ +#ifdef LEGACY_BACKEND + return false; +#else + return (ins >= INS_FIRST_SSE4_INSTRUCTION && ins <= INS_LAST_SSE4_INSTRUCTION); +#endif +} + bool IsSSEOrAVXInstruction(instruction ins) { #ifdef FEATURE_AVX_SUPPORT @@ -48,7 +57,9 @@ bool emitter::IsAVXInstruction(instruction ins) #endif } +#ifdef _TARGET_AMD64_ #define REX_PREFIX_MASK 0xFF00000000LL +#endif // _TARGET_AMD64_ #ifdef FEATURE_AVX_SUPPORT // Returns true if the AVX instruction is a binary operator that requires 3 operands. @@ -75,10 +86,8 @@ bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins) ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb || ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw || ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub || - ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 || - ins == INS_punpckldq - - ); + ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || + ins == INS_vinsertf128 || ins == INS_punpckldq || ins == INS_phaddd); } // Returns true if the AVX instruction is a move operator that requires 3 operands. @@ -92,22 +101,45 @@ bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins) return IsAVXInstruction(ins) && (ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss); } -#endif // FEATURE_AVX_SUPPORT -// Returns true if the AVX instruction is a 4-byte opcode. +// ------------------------------------------------------------------------------ +// Is4ByteAVXInstruction: Returns true if the AVX instruction is a 4-byte opcode. +// +// Arguments: +// ins - instructions +// // Note that this should be true for any of the instructions in instrsXArch.h // that use the SSE38 or SSE3A macro. +// // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this // needs to be addressed by expanding instruction encodings. -bool Is4ByteAVXInstruction(instruction ins) +bool emitter::Is4ByteAVXInstruction(instruction ins) { -#ifdef FEATURE_AVX_SUPPORT - return (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq || + return UseAVX() && + (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw || ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 || - ins == INS_pmulld); -#else + ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd); +} +#endif // FEATURE_AVX_SUPPORT + +// ------------------------------------------------------------------- +// Is4ByteSSE4Instruction: Returns true if the SSE4 instruction +// is a 4-byte opcode. +// +// Arguments: +// ins - instruction +// +// Note that this should be true for any of the instructions in instrsXArch.h +// that use the SSE38 or SSE3A macro. +bool emitter::Is4ByteSSE4Instruction(instruction ins) +{ +#ifdef LEGACY_BACKEND + // On legacy backend SSE3_4 is not enabled. return false; +#else + return UseSSE3_4() && (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || + ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd); #endif } @@ -150,8 +182,9 @@ bool emitter::TakesVexPrefix(instruction ins) // prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar // and AVX-128 bit operations. #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL -#define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL -size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr) +#define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL +#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL +emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr) { // Only AVX instructions require VEX prefix assert(IsAVXInstruction(ins)); @@ -160,6 +193,7 @@ size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr) assert(!hasVexPrefix(code)); // Set L bit to 1 in case of instructions that operate on 256-bits. + assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0); code |= DEFAULT_3BYTE_VEX_PREFIX; if (attr == EA_32BYTE) { @@ -296,25 +330,25 @@ bool IsXMMReg(regNumber reg) } // Returns bits to be encoded in instruction for the given register. -regNumber RegEncoding(regNumber reg) +unsigned RegEncoding(regNumber reg) { #ifndef LEGACY_BACKEND // XMM registers do not share the same reg numbers as integer registers. // But register encoding of integer and XMM registers is the same. // Therefore, subtract XMMBASE from regNumber to get the register encoding // in case of XMM registers. - return (regNumber)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7); + return (unsigned)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7); #else // LEGACY_BACKEND // Legacy X86: XMM registers share the same reg numbers as integer registers and // hence nothing to do to get reg encoding. - return (regNumber)(reg & 0x7); + return (unsigned)(reg & 0x7); #endif // LEGACY_BACKEND } // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes // SSE2: separate 1-byte prefix gets added before opcode. // AVX: specific bits within VEX prefix need to be set in bit-inverted form. -size_t emitter::AddRexWPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code) { #ifdef _TARGET_AMD64_ if (UseAVX() && IsAVXInstruction(ins)) @@ -335,7 +369,7 @@ size_t emitter::AddRexWPrefix(instruction ins, size_t code) #ifdef _TARGET_AMD64_ -size_t emitter::AddRexRPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -349,7 +383,7 @@ size_t emitter::AddRexRPrefix(instruction ins, size_t code) return code | 0x4400000000ULL; } -size_t emitter::AddRexXPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -363,7 +397,7 @@ size_t emitter::AddRexXPrefix(instruction ins, size_t code) return code | 0x4200000000ULL; } -size_t emitter::AddRexBPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -378,12 +412,14 @@ size_t emitter::AddRexBPrefix(instruction ins, size_t code) } // Adds REX prefix (0x40) without W, R, X or B bits set -size_t emitter::AddRexPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) { assert(!UseAVX() || !IsAVXInstruction(ins)); return code | 0x4000000000ULL; } +#endif //_TARGET_AMD64_ + bool isPrefix(BYTE b) { assert(b != 0); // Caller should check this @@ -401,17 +437,15 @@ bool isPrefix(BYTE b) return ((b == 0xF2) || (b == 0xF3) || (b == 0x66)); } -#endif //_TARGET_AMD64_ - // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. -unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code) +unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) { -#ifdef _TARGET_AMD64_ // TODO-x86: This needs to be enabled for AVX support on x86. +#ifdef FEATURE_AVX_SUPPORT if (hasVexPrefix(code)) { // Only AVX instructions should have a VEX prefix assert(UseAVX() && IsAVXInstruction(ins)); - size_t vexPrefix = (code >> 32) & 0x00FFFFFF; + code_t vexPrefix = (code >> 32) & 0x00FFFFFF; code &= 0x00000000FFFFFFFFLL; WORD leadingBytes = 0; @@ -504,7 +538,10 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s emitOutputByte(dst + 2, vexPrefix & 0xFF); return 3; } - else if (code > 0x00FFFFFFFFLL) +#endif // FEATURE_AVX_SUPPORT + +#ifdef _TARGET_AMD64_ + if (code > 0x00FFFFFFFFLL) { BYTE prefix = (code >> 32) & 0xFF; noway_assert(prefix >= 0x40 && prefix <= 0x4F); @@ -543,13 +580,13 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s { // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX // Change to c2rrc1XXXX, and emit check2 now - code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL)); + code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL)); } else { // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) // Change to c2XXrrXXXX, and emit check2 now - code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL)); + code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL)); } return emitOutputByte(dst, check2); } @@ -593,7 +630,6 @@ void emitter::emitOutputPreEpilogNOP() // Size of rex prefix in bytes unsigned emitter::emitGetRexPrefixSize(instruction ins) { - // In case of AVX instructions, REX prefixes are part of VEX prefix. // And hence requires no additional byte to encode REX prefixes. if (IsAVXInstruction(ins)) @@ -630,7 +666,7 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) //=(opcodeSize - ExtrabytesSize) + vexPrefixSize //=opcodeSize + (vexPrefixSize - ExtrabytesSize) //=opcodeSize + vexPrefixAdjustedSize -unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code) +unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code) { #ifdef FEATURE_AVX_SUPPORT if (IsAVXInstruction(ins)) @@ -674,19 +710,19 @@ unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, s } // Get size of rex or vex prefix emitted in code -unsigned emitter::emitGetPrefixSize(size_t code) +unsigned emitter::emitGetPrefixSize(code_t code) { -#ifdef FEATURE_AVX_SUPPORT - if (code & VEX_PREFIX_MASK_3BYTE) + if (hasVexPrefix(code)) { return 3; } - else -#endif - if (code & REX_PREFIX_MASK) + +#ifdef _TARGET_AMD64_ + if (code & REX_PREFIX_MASK) { return 1; } +#endif // _TARGET_AMD64_ return 0; } @@ -1058,7 +1094,7 @@ size_t insCodesMR[] = // clang-format on // Returns true iff the give CPU instruction has an MR encoding. -inline size_t hasCodeMR(instruction ins) +inline bool hasCodeMR(instruction ins) { assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0])); return ((insCodesMR[ins] != BAD_CODE)); @@ -1083,7 +1119,7 @@ inline size_t insCodeMR(instruction ins) * part of an opcode. */ -inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code) +inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code) { assert(reg < REG_STK); @@ -1106,16 +1142,16 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt } #endif // _TARGET_AMD64_ - reg = RegEncoding(reg); - assert(reg < 8); - return reg; + unsigned regBits = RegEncoding(reg); #else // LEGACY_BACKEND - assert(reg < 8); - return reg; + unsigned regBits = reg; #endif // LEGACY_BACKEND + + assert(regBits < 8); + return regBits; } /***************************************************************************** @@ -1124,7 +1160,7 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt * part of an opcode. */ -inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code) +inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code) { assert(reg < REG_STK); @@ -1147,14 +1183,16 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt } #endif // _TARGET_AMD64_ - reg = RegEncoding(reg); - assert(reg < 8); - return (reg << 3); + unsigned regBits = RegEncoding(reg); + +#else // LEGACY_BACKEND + + unsigned regBits = reg; -#else // LEGACY_BACKEND - assert(reg < 8); - return (reg << 3); #endif // LEGACY_BACKEND + + assert(regBits < 8); + return (regBits << 3); } /*********************************************************************************** @@ -1162,7 +1200,7 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt * Returns modified AVX opcode with the specified register encoded in bits 3-6 of * byte 2 of VEX prefix. */ -inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code) { #ifdef FEATURE_AVX_SUPPORT assert(reg < REG_STK); @@ -1172,7 +1210,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr // Get 4-bit register encoding // RegEncoding() gives lower 3 bits // IsExtendedReg() gives MSB. - size_t regBits = RegEncoding(reg); + code_t regBits = RegEncoding(reg); if (IsExtendedReg(reg)) { regBits |= 0x08; @@ -1196,7 +1234,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr * Used exclusively to generate the REX.X bit and truncate the register. */ -inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code) +inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code) { assert(reg < REG_STK); @@ -1210,11 +1248,13 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* { *code = AddRexXPrefix(ins, *code); // REX.X } - reg = RegEncoding(reg); -#endif + unsigned regBits = RegEncoding(reg); +#else // !_TARGET_AMD64_ + unsigned regBits = reg; +#endif // !_TARGET_AMD64_ - assert(reg < 8); - return reg; + assert(regBits < 8); + return regBits; } /***************************************************************************** @@ -1222,7 +1262,7 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* * Returns the "[r/m]" opcode with the mod/RM field set to register. */ -inline size_t emitter::insEncodeMRreg(instruction ins, size_t code) +inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. @@ -1237,22 +1277,10 @@ inline size_t emitter::insEncodeMRreg(instruction ins, size_t code) /***************************************************************************** * - * Returns the "[r/m], icon" opcode with the mod/RM field set to register. - */ - -inline size_t insEncodeMIreg(instruction ins, size_t code) -{ - assert((code & 0xC000) == 0); - code |= 0xC000; - return code; -} - -/***************************************************************************** - * * Returns the given "[r/m]" opcode with the mod/RM field set to register. */ -inline size_t insEncodeRMreg(instruction ins, size_t code) +inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. @@ -1270,7 +1298,7 @@ inline size_t insEncodeRMreg(instruction ins, size_t code) * the given register. */ -inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code) { assert((code & 0xC000) == 0); code |= 0xC000; @@ -1285,7 +1313,7 @@ inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr s * the given register. */ -inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code) { assert((code & 0xC000) == 0); code |= 0xC000; @@ -1310,12 +1338,12 @@ inline bool insNeedsRRIb(instruction ins) * Returns the "reg,reg,imm8" opcode with both the reg's set to the * the given register. */ -inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) +inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) { assert(size == EA_4BYTE); // All we handle for now. assert(insNeedsRRIb(ins)); // If this list gets longer, use a switch, or a table lookup. - size_t code = 0x69c0; + code_t code = 0x69c0; unsigned regcode = insEncodeReg012(ins, reg, size, &code); // We use the same register as source and destination. (Could have another version that does both regs...) code |= regcode; @@ -1329,9 +1357,9 @@ inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr si * nibble of the opcode */ -inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) +inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) { - size_t code = insCodeRR(ins); + code_t code = insCodeRR(ins); unsigned regcode = insEncodeReg012(ins, reg, size, &code); code |= regcode; return code; @@ -1342,7 +1370,7 @@ inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr s * Return the 'SS' field value for the given index scale factor. */ -inline unsigned insSSval(unsigned scale) +inline unsigned emitter::insSSval(unsigned scale) { assert(scale == 1 || scale == 2 || scale == 4 || scale == 8); @@ -1447,7 +1475,7 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1 * Estimate the size (in bytes of generated code) of the given instruction. */ -inline UNATIVE_OFFSET emitter::emitInsSize(size_t code) +inline UNATIVE_OFFSET emitter::emitInsSize(code_t code) { UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; #ifdef _TARGET_AMD64_ @@ -1466,18 +1494,17 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re emitAttr size = EA_SIZE(attr); UNATIVE_OFFSET sz; -#ifdef _TARGET_AMD64_ - // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes. + + // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. // This would probably be better expressed as a different format or something? - if (insCodeRM(ins) & 0xFF00) + if ((insCodeRM(ins) & 0xFF00) != 0) { sz = 5; } else -#endif // _TARGET_AMD64_ { - size_t code = insCodeRM(ins); + code_t code = insCodeRM(ins); sz = emitInsSize(insEncodeRMreg(ins, code)); } @@ -1502,7 +1529,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re /*****************************************************************************/ -inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp) +inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp) { UNATIVE_OFFSET size = emitInsSize(code); UNATIVE_OFFSET offs; @@ -1777,7 +1804,7 @@ static bool baseRegisterRequiresDisplacement(regNumber base) #endif } -UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) +UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) { emitAttr attrSize = id->idOpSize(); instruction ins = id->idIns(); @@ -1994,7 +2021,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) return size; } -inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val) +inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); @@ -2027,7 +2054,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val return valSize + emitInsSizeAM(id, code); } -inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code) +inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) { instruction ins = id->idIns(); @@ -2047,7 +2074,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code) return size + emitInsSize(code); } -inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val) +inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); @@ -2252,7 +2279,7 @@ void emitter::emitIns(instruction ins) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(); - size_t code = insCodeMR(ins); + code_t code = insCodeMR(ins); #ifdef DEBUG #if FEATURE_STACK_FP_X87 @@ -2328,7 +2355,7 @@ void emitter::emitIns(instruction ins, emitAttr attr) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); - size_t code = insCodeMR(ins); + code_t code = insCodeMR(ins); assert(ins == INS_cdq); assert((code & 0xFFFFFF00) == 0); sz = 1; @@ -2499,8 +2526,9 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, // Absolute addresses marked as contained should fit within the base of addr mode. assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); - // Either not generating relocatable code or addr must be an icon handle - assert(!emitComp->opts.compReloc || memBase->IsIconHandle()); + // Either not generating relocatable code, or addr must be an icon handle, or the + // constant is zero (which we won't generate a relocation for). + assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0)); if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { @@ -2904,6 +2932,19 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G varNum = tmpDsc->tdTempNum(); offset = 0; } + else + { + // At this point we must have a memory operand that is a contained indir: if we do not, we should have handled + // this instruction above in the reg/imm or reg/reg case. + assert(mem != nullptr); + assert(memBase != nullptr); + + if (memBase->OperGet() == GT_LCL_VAR_ADDR) + { + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = 0; + } + } // Spill temp numbers are negative and start with -1 // which also happens to be BAD_VAR_NUM. For this reason @@ -2911,7 +2952,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G if (varNum != BAD_VAR_NUM || tmpDsc != nullptr) { // Is the memory op in the source position? - if (src->isContainedLclField() || src->isContainedLclVar() || src->isContainedSpillTemp()) + if (src->isContainedMemoryOp()) { if (instrHasImplicitRegPairDest(ins)) { @@ -3351,22 +3392,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3484,7 +3510,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t sz += emitGetRexPrefixSize(ins); } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND) assert(reg < 8); #endif @@ -3504,34 +3530,10 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - if (reg == REG_ESP) { - if (emitCntStackDepth) - { - if (ins == INS_sub) - { - S_UINT32 newStackLvl(emitCurStackLvl); - newStackLvl += S_UINT32(val); - noway_assert(!newStackLvl.IsOverflow()); - - emitCurStackLvl = newStackLvl.Value(); - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_add) - { - S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); - noway_assert(!newStackLvl.IsOverflow()); - - emitCurStackLvl = newStackLvl.Value(); - } - } + emitAdjustStackDepth(ins, val); } - -#endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** @@ -3584,17 +3586,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, int val) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3693,22 +3685,7 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3757,11 +3734,14 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) { - // SSE2 version requires 5 bytes and AVX version 6 bytes + // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes UNATIVE_OFFSET sz = 4; if (IsSSEOrAVXInstruction(ins)) { - sz = UseAVX() ? 6 : 5; + // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate + // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate + // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate + sz = (UseAVX() || UseSSE3_4()) ? 6 : 5; } #ifdef _TARGET_AMD64_ @@ -4014,7 +3994,7 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f id->idIns(ins); id->idInsFmt(fmt); - size_t code = insCodeMI(ins); + code_t code = insCodeMI(ins); UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val); #ifdef _TARGET_AMD64_ @@ -4387,22 +4367,7 @@ void emitter::emitIns_AR_R( dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) @@ -4443,22 +4408,7 @@ void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp) @@ -4575,22 +4525,7 @@ void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regN dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_ARX( @@ -4711,22 +4646,7 @@ void emitter::emitIns_ARX_R( dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp) @@ -4842,22 +4762,7 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -4901,22 +4806,7 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) @@ -5197,8 +5087,23 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 dispIns(id); emitCurIGsize += sz; + emitAdjustStackDepthPushPop(ins); +} + #if !FEATURE_FIXED_OUT_ARGS +//------------------------------------------------------------------------ +// emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth. +// +// Arguments: +// ins - the instruction. Only INS_push and INS_pop adjust the stack depth. +// +// Notes: +// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. +// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) +// +void emitter::emitAdjustStackDepthPushPop(instruction ins) +{ if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; @@ -5206,10 +5111,53 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } + else if (ins == INS_pop) + { + emitCurStackLvl -= emitCntStackDepth; + assert((int)emitCurStackLvl >= 0); + } +} -#endif // !FEATURE_FIXED_OUT_ARGS +//------------------------------------------------------------------------ +// emitAdjustStackDepth: Adjust the current and maximum stack depth. +// +// Arguments: +// ins - the instruction. Only INS_add and INS_sub adjust the stack depth. +// It is assumed that the add/sub is on the stack pointer. +// val - the number of bytes to add to or subtract from the stack pointer. +// +// Notes: +// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. +// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) +// +void emitter::emitAdjustStackDepth(instruction ins, ssize_t val) +{ + // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return. + if (emitCntStackDepth == 0) + return; + + if (ins == INS_sub) + { + S_UINT32 newStackLvl(emitCurStackLvl); + newStackLvl += S_UINT32(val); + noway_assert(!newStackLvl.IsOverflow()); + + emitCurStackLvl = newStackLvl.Value(); + + if (emitMaxStackDepth < emitCurStackLvl) + emitMaxStackDepth = emitCurStackLvl; + } + else if (ins == INS_add) + { + S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); + noway_assert(!newStackLvl.IsOverflow()); + + emitCurStackLvl = newStackLvl.Value(); + } } +#endif // EMIT_TRACK_STACK_DEPTH + /***************************************************************************** * * Add a call instruction (direct or indirect). @@ -5393,13 +5341,11 @@ void emitter::emitIns_Call(EmitCallType callType, assert(argSize % sizeof(void*) == 0); argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide -#ifdef DEBUGGING_SUPPORT /* Managed RetVal: emit sequence point for the call */ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) { codeGen->genIPmappingAdd(ilOffset, false); } -#endif /* We need to allocate the appropriate instruction descriptor based @@ -5793,9 +5739,18 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) return emitXMMregName(reg); case EA_8BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + { + return emitXMMregName(reg); + } break; case EA_4BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + { + return emitXMMregName(reg); + } + if (reg > REG_R15) { break; @@ -5880,10 +5835,24 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) case EA_16BYTE: return emitXMMregName(reg); -#endif // LEGACY_BACKEND + case EA_8BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + { + return emitXMMregName(reg); + } + break; + + case EA_4BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + { + return emitXMMregName(reg); + } + break; +#else // LEGACY_BACKEND case EA_4BYTE: break; +#endif // LEGACY_BACKEND case EA_2BYTE: rn++; @@ -6661,9 +6630,9 @@ void emitter::emitDispIns( printf(" %-9s", sstr); } #ifndef FEATURE_PAL - if (strnlen_s(sstr, 10) > 8) + if (strnlen_s(sstr, 10) >= 8) #else // FEATURE_PAL - if (strnlen(sstr, 10) > 8) + if (strnlen(sstr, 10) >= 8) #endif // FEATURE_PAL { printf(" "); @@ -6808,17 +6777,8 @@ void emitter::emitDispIns( case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else #ifdef _TARGET_AMD64_ - if (ins == INS_movsxd) + if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } @@ -6841,18 +6801,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispAddrMode(id); - if (IsAVXInstruction(ins)) - { - printf(", %s", emitYMMregName((unsigned)id->idReg1())); - } - else if (IsSSE2Instruction(ins)) - { - printf(", %s", emitXMMregName((unsigned)id->idReg1())); - } - else - { - printf(", %s", emitRegName(id->idReg1(), attr)); - } + printf(", %s", emitRegName(id->idReg1(), attr)); break; case IF_ARD_CNS: @@ -6930,18 +6879,7 @@ void emitter::emitDispIns( emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); - if (IsAVXInstruction(ins)) - { - printf(", %s", emitYMMregName((unsigned)id->idReg1())); - } - else if (IsSSE2Instruction(ins)) - { - printf(", %s", emitXMMregName((unsigned)id->idReg1())); - } - else - { - printf(", %s", emitRegName(id->idReg1(), attr)); - } + printf(", %s", emitRegName(id->idReg1(), attr)); break; case IF_SRD_CNS: @@ -6983,17 +6921,8 @@ void emitter::emitDispIns( case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else #ifdef _TARGET_AMD64_ - if (ins == INS_movsxd) + if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } @@ -7016,36 +6945,31 @@ void emitter::emitDispIns( case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: - if (ins == INS_mov_i2xmm) { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr)); + printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); } else if (ins == INS_mov_xmm2i) { - printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1())); + printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE)); + } + else if (ins == INS_pmovmskb) + { + printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); } #ifndef LEGACY_BACKEND else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd)) { - printf(" %s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr)); + printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); } #endif else if ((ins == INS_cvttsd2si) #ifndef LEGACY_BACKEND || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si) #endif - ) - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2())); - } - else if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2())); - } - else if (IsSSE2Instruction(ins)) + || 0) { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2())); + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); } #ifdef _TARGET_AMD64_ else if (ins == INS_movsxd) @@ -7079,16 +7003,8 @@ void emitter::emitDispIns( break; #endif case IF_RRW_RRW_CNS: - if (IsAVXInstruction(ins)) - { - printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr); - printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr); - } - else - { - printf("%s,", emitRegName(id->idReg1(), attr)); - printf(" %s", emitRegName(id->idReg2(), attr)); - } + printf("%s,", emitRegName(id->idReg1(), attr)); + printf(" %s", emitRegName(id->idReg2(), attr)); val = emitGetInsSC(id); #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! @@ -7133,18 +7049,7 @@ void emitter::emitDispIns( attr = EA_PTRSIZE; } #endif - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else - { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); - } + printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; @@ -7521,7 +7426,7 @@ static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes) * Output an instruction involving an address mode. */ -BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { regNumber reg; regNumber rgx; @@ -7543,7 +7448,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case: call via a register if (id->idIsCallRegPtr()) { - size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); + code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode); dst += emitOutputWord(dst, opcode); @@ -7559,13 +7464,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); - reg = RegEncoding(reg); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + reg = (regNumber)RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); - rgx = RegEncoding(rgx); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + rgx = (regNumber)RegEncoding(rgx); } // And emit the REX prefix @@ -7605,7 +7512,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { - // Encode source operand reg in 'vvvv' bits in 1's compliement form + // Encode source operand reg in 'vvvv' bits in 1's complement form // The order of operands are reversed, therefore use reg2 as the source. code = insEncodeReg3456(ins, id->idReg1(), size, code); } @@ -7619,13 +7526,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); - reg = RegEncoding(reg); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + reg = (regNumber)RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); - rgx = RegEncoding(rgx); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + rgx = (regNumber)RegEncoding(rgx); } // Is this a 'big' opcode? @@ -8185,7 +8094,7 @@ DONE: * Output an instruction involving a stack frame value. */ -BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { int adr; int dsp; @@ -8234,7 +8143,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { - size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte @@ -8581,7 +8490,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) * Output an instruction with a static data member (class variable). */ -BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { BYTE* addr; CORINFO_FIELD_HANDLE fldh; @@ -8646,20 +8555,18 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) { case IF_RWR_MRD: - assert((unsigned)code == - (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); + assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); - code &= ~((size_t)0xFFFFFFFF); + code &= ~((code_t)0xFFFFFFFF); code |= 0xA0; isMoffset = true; break; case IF_MWR_RRD: - assert((unsigned)code == - (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); + assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); - code &= ~((size_t)0xFFFFFFFF); + code &= ~((code_t)0xFFFFFFFF); code |= 0xA2; isMoffset = true; break; @@ -8674,7 +8581,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { - size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte @@ -9017,7 +8924,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); regNumber reg = id->idReg1(); @@ -9228,7 +9135,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); regNumber reg1 = id->idReg1(); @@ -9238,7 +9145,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Get the 'base' opcode code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); - if (IsSSE2Instruction(ins) || IsAVXInstruction(ins)) + if (IsSSEOrAVXInstruction(ins)) { code = insEncodeRMreg(ins, code); @@ -9322,12 +9229,12 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // now we use the single source as source1 and source2. if (IsThreeOperandBinaryAVXInstruction(ins)) { - // encode source/dest operand reg in 'vvvv' bits in 1's compliement form + // encode source/dest operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg1, size, code); } else if (IsThreeOperandMoveAVXInstruction(ins)) { - // encode source operand reg in 'vvvv' bits in 1's compliement form + // encode source operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg2, size, code); } @@ -9340,6 +9247,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Output the highest word of the opcode dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; + + if (Is4ByteSSE4Instruction(ins)) + { + // Output 3rd byte of the opcode + dst += emitOutputByte(dst, code); + code &= 0xFF00; + } } else if (code & 0x00FF0000) { @@ -9349,13 +9263,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte // encoding. Otherwise, this is an instruction with a 4-byte encoding, - // and the MOd/RM encoding needs to go in the 5th byte. + // and the Mod/RM encoding needs to go in the 5th byte. // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte. // We probably need a different mechanism to identify the 4-byte encodings. if ((code & 0xFF) == 0x00) { - // This case happens for AVX instructions only - assert(IsAVXInstruction(ins)); + // This case happens for SSE4/AVX instructions only + assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); if ((code & 0xFF00) == 0xC000) { dst += emitOutputByte(dst, (0xC0 | regCode)); @@ -9560,7 +9474,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) #ifdef FEATURE_AVX_SUPPORT BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); assert(IsAVXInstruction(ins)); @@ -9642,7 +9556,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; emitAttr size = id->idOpSize(); instruction ins = id->idIns(); regNumber reg = id->idReg1(); @@ -10004,7 +9918,7 @@ DONE: BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); emitAttr size = id->idOpSize(); ssize_t val = emitGetInsSC(id); @@ -10286,27 +10200,29 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) } else { - size_t code; + code_t code; // Long jump if (jmp) { + // clang-format off assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp); - assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo); - assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb); + assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo); + assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb); assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae); - assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je); + assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je); assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne); assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe); - assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja); - assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js); + assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja); + assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js); assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns); assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe); assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo); - assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl); + assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl); assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge); assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle); - assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg); + assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg); + // clang-format on code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp))); } @@ -10452,10 +10368,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // What instruction format have we got? switch (id->idInsFmt()) { - size_t code; - size_t regcode; - int args; - CnsVal cnsVal; + code_t code; + unsigned regcode; + int args; + CnsVal cnsVal; BYTE* addr; bool recCall; @@ -10792,6 +10708,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); + + // Update GC info. + assert(!id->idGCref()); + emitGCregDeadUpd(id->idReg1(), dst); break; case IF_RRD_RRD: @@ -10871,7 +10791,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (UseAVX() && Is4ByteAVXInstruction(ins)) + if (Is4ByteAVXInstruction(ins)) { // We just need to output the last byte of the opcode. assert((code & 0xFF) == 0); @@ -10883,6 +10803,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; + + if (Is4ByteSSE4Instruction(ins)) + { + dst += emitOutputWord(dst, code); + code = 0; + } } else if (code & 0x00FF0000) { @@ -10898,9 +10824,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } else { - // This case occurs for AVX instructions. + // This case occurs for SSE4/AVX instructions. // Note that regcode is left shifted by 8-bits. - assert(Is4ByteAVXInstruction(ins)); + assert(Is4ByteAVXInstruction(ins) || Is4ByteSSE4Instruction(ins)); dst += emitOutputByte(dst, 0xC0 | (regcode >> 8)); } |