diff options
author | Tanner Gooding <tagoo@outlook.com> | 2018-08-07 13:31:30 -0700 |
---|---|---|
committer | Tanner Gooding <tagoo@outlook.com> | 2018-08-09 12:35:54 -0700 |
commit | 2110b70551bac4f4ad65fcbbdfd853eea9000b4e (patch) | |
tree | fc5cfac5e5e61758f6f8b307ca6c1e8e7c173d31 /src/jit | |
parent | ce175cbb849378c2ef1985ff3994cd0d82f3d8fe (diff) | |
download | coreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.tar.gz coreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.tar.bz2 coreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.zip |
Implementing the Avx.MaskStore intrinsics
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/emitfmtsxarch.h | 2 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 91 | ||||
-rw-r--r-- | src/jit/emitxarch.h | 2 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 21 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 1 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 17 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 4 |
7 files changed, 118 insertions, 20 deletions
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h index 190a6e0ea9..b070b3d083 100644 --- a/src/jit/emitfmtsxarch.h +++ b/src/jit/emitfmtsxarch.h @@ -195,6 +195,8 @@ IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg +IF_DEF(AWR_RRD_RRD, IS_AM_WR|IS_R1_RD|IS_R2_RD, AMD ) // write [adr], read reg, read reg + IF_DEF(ARD_CNS, IS_AM_RD, AMD_CNS) // read [adr], const IF_DEF(AWR_CNS, IS_AM_WR, AMD_CNS) // write [adr], const IF_DEF(ARW_CNS, IS_AM_RW, AMD_CNS) // r/w [adr], const diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 673bb550f8..08860079c4 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -4148,6 +4148,42 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off emitCurIGsize += sz; } +//------------------------------------------------------------------------ +// emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands +// and that does not return a value +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op2Reg -- The register of the second operand +// op3Reg -- The register of the third operand +// base -- The base register used for the memory address (first operand) +// offs -- The offset from base +// +void emitter::emitIns_AR_R_R( + instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs) +{ + assert(IsSSEOrAVXInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins)); + + instrDesc* id = emitNewInstrAmd(attr, offs); + + id->idIns(ins); + id->idReg1(op2Reg); + id->idReg2(op3Reg); + + id->idInsFmt(IF_AWR_RRD_RRD); + id->idAddr()->iiaAddrMode.amBaseReg = base; + id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} + void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir) { ssize_t offs = indir->Offset(); @@ -8586,6 +8622,15 @@ void emitter::emitDispIns( printf(", %s", emitRegName(id->idReg1(), attr)); break; + case IF_AWR_RRD_RRD: + { + printf("%s", sstr); + emitDispAddrMode(id); + printf(", %s", emitRegName(id->idReg1(), attr)); + printf(", %s", emitRegName(id->idReg2(), attr)); + break; + } + case IF_ARD_CNS: case IF_AWR_CNS: case IF_ARW_CNS: @@ -9412,12 +9457,23 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { if (IsDstDstSrcAVXInstruction(ins)) { - regNumber src1 = id->idReg2(); + regNumber src1 = REG_NA; - if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS) && - (id->idInsFmt() != IF_RWR_RRD_ARD_RRD)) + switch (id->idInsFmt()) { - src1 = id->idReg1(); + case IF_RWR_RRD_ARD: + case IF_RWR_RRD_ARD_CNS: + case IF_RWR_RRD_ARD_RRD: + { + src1 = id->idReg2(); + break; + } + + default: + { + src1 = id->idReg1(); + break; + } } // encode source operand reg in 'vvvv' bits in 1's complement form @@ -9469,7 +9525,20 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } if (reg345 == REG_NA) { - reg345 = id->idReg1(); + switch (id->idInsFmt()) + { + case IF_AWR_RRD_RRD: + { + reg345 = id->idReg2(); + break; + } + + default: + { + reg345 = id->idReg1(); + break; + } + } } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); @@ -10100,6 +10169,9 @@ DONE: case IF_AWR_RRD: break; + case IF_AWR_RRD_RRD: + break; + case IF_ARD_CNS: case IF_AWR_CNS: break; @@ -13080,6 +13152,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitSizeOfInsDsc(id); break; + case IF_AWR_RRD_RRD: + { + code = insCodeMR(ins); + code = AddVexPrefixIfNeeded(ins, code, size); + dst = emitOutputAM(dst, id, code); + sz = emitSizeOfInsDsc(id); + break; + } + case IF_ARD_CNS: case IF_AWR_CNS: case IF_ARW_CNS: diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index e7a93a1bc6..3ec962f613 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -319,6 +319,8 @@ void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs); +void emitIns_AR_R_R(instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs); + void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir); void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 42302840ff..ad1be6a061 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -253,15 +253,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (category == HW_Category_MemoryStore) { - assert(intrinsicId == NI_SSE2_MaskMove); - assert(targetReg == REG_NA); - - // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI - if (op3Reg != REG_EDI) + if (intrinsicId == NI_AVX_MaskStore) + { + emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0); + } + else { - emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); + assert(intrinsicId == NI_SSE2_MaskMove); + assert(targetReg == REG_NA); + + // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI + if (op3Reg != REG_EDI) + { + emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); + } + emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } - emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); } else { diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index ff6f2ff372..f085ff545f 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -368,6 +368,7 @@ HARDWARE_INTRINSIC(AVX_LoadVector256, "LoadVector2 HARDWARE_INTRINSIC(AVX_Max, "Max", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Min, "Min", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_MaskLoad, "MaskLoad", AVX, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize) +HARDWARE_INTRINSIC(AVX_MaskStore, "MaskStore", AVX, -1, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX_MoveMask, "MoveMask", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX_Multiply, "Multiply", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Or, "Or", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 9032138e31..3cf1ac6664 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -184,11 +184,16 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI { typeHnd = sig->retTypeSigClass; } - else + else if (HWIntrinsicInfo::BaseTypeFromFirstArg(id)) { - assert(HWIntrinsicInfo::BaseTypeFromFirstArg(id)); typeHnd = comp->info.compCompHnd->getArgClass(sig, sig->args); } + else + { + assert(HWIntrinsicInfo::BaseTypeFromSecondArg(id)); + CORINFO_ARG_LIST_HANDLE secondArg = comp->info.compCompHnd->getArgNext(sig->args); + typeHnd = comp->info.compCompHnd->getArgClass(sig, secondArg); + } unsigned simdSize = 0; var_types baseType = comp->getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize); @@ -385,7 +390,6 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa) } // These ISAs are partially implemented - case InstructionSet_AVX: case InstructionSet_AVX2: case InstructionSet_BMI1: case InstructionSet_BMI2: @@ -395,14 +399,15 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa) } // These ISAs are fully implemented + case InstructionSet_AVX: + case InstructionSet_FMA: + case InstructionSet_LZCNT: + case InstructionSet_POPCNT: case InstructionSet_SSE: case InstructionSet_SSE2: case InstructionSet_SSE3: case InstructionSet_SSSE3: case InstructionSet_SSE41: - case InstructionSet_FMA: - case InstructionSet_LZCNT: - case InstructionSet_POPCNT: { return true; } diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 87fd76a5d6..e38638aa40 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -498,8 +498,8 @@ INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register -INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Loads Float -INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Loads Double +INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, SSE38(0x2E), BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores +INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, SSE38(0x2F), BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) // enum name FP updmode rf wf MR MI RM |