diff options
-rw-r--r-- | src/jit/emitfmtsxarch.h | 4 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 101 | ||||
-rw-r--r-- | src/jit/emitxarch.h | 1 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 2 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 4 |
5 files changed, 109 insertions, 3 deletions
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h index b7ab38f0c0..4d97c4d8c5 100644 --- a/src/jit/emitfmtsxarch.h +++ b/src/jit/emitfmtsxarch.h @@ -139,6 +139,8 @@ IF_DEF(MRD_CNS, IS_GM_RD, DSP_CNS) // read [mem], const IF_DEF(MWR_CNS, IS_GM_WR, DSP_CNS) // write [mem], const IF_DEF(MRW_CNS, IS_GM_RW, DSP_CNS) // r/w [mem], const +IF_DEF(MWR_RRD_CNS, IS_GM_WR|IS_R1_RD, DSP_CNS) // write [mem], read reg, const + IF_DEF(MRW_SHF, IS_GM_RW, DSP_CNS) // shift [mem], const //---------------------------------------------------------------------------- @@ -194,6 +196,8 @@ IF_DEF(ARD_CNS, IS_AM_RD, AMD_CNS) // read [adr], const IF_DEF(AWR_CNS, IS_AM_WR, AMD_CNS) // write [adr], const IF_DEF(ARW_CNS, IS_AM_RW, AMD_CNS) // r/w [adr], const +IF_DEF(AWR_RRD_CNS, IS_AM_WR|IS_R1_RD, AMD_CNS) // write [adr], read reg, const + IF_DEF(ARW_SHF, IS_AM_RW, AMD_CNS) // shift [adr], const diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 117bc21c2e..094776b17d 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -2730,6 +2730,9 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt) case IF_ARW_CNS: return IF_MRW_CNS; + case IF_AWR_RRD_CNS: + return IF_MWR_RRD_CNS; + case IF_ARW_SHF: return IF_MRW_SHF; @@ -5067,6 +5070,32 @@ void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNu emitAdjustStackDepthPushPop(ins); } +#ifndef LEGACY_BACKEND +void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival) +{ + assert(ins == INS_vextracti128 || ins == INS_vextractf128); + assert(base != REG_NA); + assert(ireg != REG_NA); + UNATIVE_OFFSET sz; + instrDesc* id = emitNewInstrAmdCns(attr, disp, ival); + + id->idIns(ins); + id->idInsFmt(IF_AWR_RRD_CNS); + id->idAddr()->iiaAddrMode.amBaseReg = base; + id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + id->idReg1(ireg); + + assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly + + // the code size of "vextracti/f128 [mem], ymm, imm8" is 6 byte + sz = 6; + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} +#endif + void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) { UNATIVE_OFFSET sz; @@ -7790,6 +7819,32 @@ void emitter::emitDispIns( break; } + case IF_AWR_RRD_CNS: + { + assert(ins == INS_vextracti128 || ins == INS_vextractf128); + // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" + sstr = codeGen->genSizeStr(EA_ATTR(16)); + printf(sstr); + emitDispAddrMode(id); + printf(", %s", emitRegName(id->idReg1(), attr)); + + emitGetInsAmdCns(id, &cnsVal); + + val = cnsVal.cnsVal; + printf(", "); + + if (cnsVal.cnsReloc) + { + emitDispReloc(val); + } + else + { + goto PRINT_CONSTANT; + } + + break; + } + case IF_RWR_RRD_ARD: printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); emitDispAddrMode(id); @@ -8166,6 +8221,32 @@ void emitter::emitDispIns( break; } + case IF_MWR_RRD_CNS: + { + assert(ins == INS_vextracti128 || ins == INS_vextractf128); + // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" + sstr = codeGen->genSizeStr(EA_ATTR(16)); + printf(sstr); + offs = emitGetInsDsp(id); + emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + printf(", %s", emitRegName(id->idReg1(), attr)); + emitGetInsDcmCns(id, &cnsVal); + + val = cnsVal.cnsVal; + printf(", "); + + if (cnsVal.cnsReloc) + { + emitDispReloc(val); + } + else + { + goto PRINT_CONSTANT; + } + + break; + } + case IF_RWR_RRD_MRD: printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); offs = emitGetInsDsp(id); @@ -12218,6 +12299,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitSizeOfInsDsc(id); break; + case IF_AWR_RRD_CNS: + assert(ins == INS_vextracti128 || ins == INS_vextractf128); + assert(UseVEXEncoding()); + emitGetInsAmdCns(id, &cnsVal); + code = insCodeMR(ins); + dst = emitOutputAM(dst, id, code, &cnsVal); + sz = emitSizeOfInsDsc(id); + break; + case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: @@ -12530,6 +12620,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitSizeOfInsDsc(id); break; + case IF_MWR_RRD_CNS: + assert(ins == INS_vextracti128 || ins == INS_vextractf128); + assert(UseVEXEncoding()); + emitGetInsDcmCns(id, &cnsVal); + code = insCodeMR(ins); + // only AVX2 vextracti128 and AVX vextractf128 can reach this path, + // they do not need VEX.vvvv to encode the register operand + dst = emitOutputCV(dst, id, code, &cnsVal); + sz = emitSizeOfInsDsc(id); + break; + case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index a5bc303719..4cfa24fcde 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -416,6 +416,7 @@ void emitIns_R_R_A_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt); void emitIns_R_R_AR_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival); +void emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival); #endif // !LEGACY_BACKEND void emitIns_R_R_C_I( diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 7005da47f8..acf013e207 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -1473,7 +1473,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) { if (intrinsicID == NI_AVX_ExtractVector128 || intrinsicID == NI_AVX2_ExtractVector128) { - emit->emitIns_R_AR_I(ins, attr, op2Reg, op1Reg, 0, (int)i); + emit->emitIns_AR_R_I(ins, attr, op1Reg, 0, op2Reg, (int)i); } else if (op2->TypeGet() == TYP_I_IMPL) { diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 23af97a282..8d210d9f2e 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -474,8 +474,8 @@ INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register -INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, SSE3A(0x19)) // Extract 128-bit packed floating point values -INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, SSE3A(0x39)) // Extract 128-bit packed integer values +INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values +INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, BAD_CODE) // Extract 128-bit packed integer values INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values INST3( vinserti128, "inserti128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x38)) // Insert 128-bit packed integer values INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) |