diff options
author | Jacek Blaszczynski <biosciencenow@outlook.com> | 2018-03-08 08:27:10 +0100 |
---|---|---|
committer | Jacek Blaszczynski <biosciencenow@outlook.com> | 2018-03-13 22:39:59 +0100 |
commit | 30833856615e0ff202fe97223dc80f3d2445d382 (patch) | |
tree | 4e4e0fc06ae1935461aaffbbc7f99c97e678c7e5 | |
parent | 1f48b29ed0c9da48ca2d30df2a308fb620d8c22f (diff) | |
download | coreclr-30833856615e0ff202fe97223dc80f3d2445d382.tar.gz coreclr-30833856615e0ff202fe97223dc80f3d2445d382.tar.bz2 coreclr-30833856615e0ff202fe97223dc80f3d2445d382.zip |
Implement SSE2 StoreNonTemporal HW intrinsic - complete SSE2 ISA
-rw-r--r-- | src/jit/emitxarch.cpp | 21 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 12 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 1 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 12 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 1 |
5 files changed, 40 insertions, 7 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 3f0b23dd79..4e1bec97fb 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -312,11 +312,12 @@ bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins) bool emitter::TakesVexPrefix(instruction ins) { // special case vzeroupper as it requires 2-byte VEX prefix - // special case the fencing and the prefetch instructions as they never take a VEX prefix + // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix switch (ins) { case INS_lfence: case INS_mfence: + case INS_movnti: case INS_prefetchnta: case INS_prefetcht0: case INS_prefetcht1: @@ -418,13 +419,21 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr) if (IsSSEOrAVXInstruction(ins)) { - if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || - ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm) + switch (ins) { - return true; + case INS_cvttsd2si: + case INS_cvttss2si: + case INS_cvtsd2si: + case INS_cvtss2si: + case INS_cvtsi2sd: + case INS_cvtsi2ss: + case INS_mov_xmm2i: + case INS_mov_i2xmm: + case INS_movnti: + return true; + default: + return false; } - - return false; } // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 77ba37c633..c83e941513 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -1084,6 +1084,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) break; } + case NI_SSE2_StoreNonTemporal: + { + assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); + assert(op1 != nullptr); + assert(op2 != nullptr); + + op2Reg = op2->gtRegNum; + instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); + emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0); + break; + } + default: unreached(); break; diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 32fa63b52a..8a33946728 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -229,6 +229,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAlign HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreNonTemporal, "StoreNonTemporal", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit) HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 869361770c..35a42dd954 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -545,7 +545,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa) { switch (isa) { - case InstructionSet_SSE2: case InstructionSet_SSE42: case InstructionSet_AVX: case InstructionSet_AVX2: @@ -557,6 +556,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa) return false; case InstructionSet_SSE: + case InstructionSet_SSE2: case InstructionSet_SSE3: case InstructionSet_SSSE3: case InstructionSet_SSE41: @@ -1012,6 +1012,16 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, break; } + case NI_SSE2_StoreNonTemporal: + { + assert(sig->numArgs == 2); + assert(JITtype2varType(sig->retType) == TYP_VOID); + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0); + break; + } + default: JITDUMP("Not implemented hardware intrinsic"); break; diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index b5b88da982..c0cd91d6a2 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -196,6 +196,7 @@ INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE #ifndef LEGACY_BACKEND INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE) +INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE) INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE) INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE) INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F)) |