summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacek Blaszczynski <biosciencenow@outlook.com>2018-03-08 08:27:10 +0100
committerJacek Blaszczynski <biosciencenow@outlook.com>2018-03-13 22:39:59 +0100
commit30833856615e0ff202fe97223dc80f3d2445d382 (patch)
tree4e4e0fc06ae1935461aaffbbc7f99c97e678c7e5
parent1f48b29ed0c9da48ca2d30df2a308fb620d8c22f (diff)
downloadcoreclr-30833856615e0ff202fe97223dc80f3d2445d382.tar.gz
coreclr-30833856615e0ff202fe97223dc80f3d2445d382.tar.bz2
coreclr-30833856615e0ff202fe97223dc80f3d2445d382.zip
Implement SSE2 StoreNonTemporal HW intrinsic - complete SSE2 ISA
-rw-r--r--src/jit/emitxarch.cpp21
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp12
-rw-r--r--src/jit/hwintrinsiclistxarch.h1
-rw-r--r--src/jit/hwintrinsicxarch.cpp12
-rw-r--r--src/jit/instrsxarch.h1
5 files changed, 40 insertions, 7 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 3f0b23dd79..4e1bec97fb 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -312,11 +312,12 @@ bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins)
bool emitter::TakesVexPrefix(instruction ins)
{
// special case vzeroupper as it requires 2-byte VEX prefix
- // special case the fencing and the prefetch instructions as they never take a VEX prefix
+ // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
switch (ins)
{
case INS_lfence:
case INS_mfence:
+ case INS_movnti:
case INS_prefetchnta:
case INS_prefetcht0:
case INS_prefetcht1:
@@ -418,13 +419,21 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
if (IsSSEOrAVXInstruction(ins))
{
- if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si ||
- ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm)
+ switch (ins)
{
- return true;
+ case INS_cvttsd2si:
+ case INS_cvttss2si:
+ case INS_cvtsd2si:
+ case INS_cvtss2si:
+ case INS_cvtsi2sd:
+ case INS_cvtsi2ss:
+ case INS_mov_xmm2i:
+ case INS_mov_i2xmm:
+ case INS_movnti:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
// TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 77ba37c633..c83e941513 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -1084,6 +1084,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE2_StoreNonTemporal:
+ {
+ assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
+ assert(op1 != nullptr);
+ assert(op2 != nullptr);
+
+ op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+ emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0);
+ break;
+ }
+
default:
unreached();
break;
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 32fa63b52a..8a33946728 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -229,6 +229,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAlign
HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreNonTemporal, "StoreNonTemporal", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit)
HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 869361770c..35a42dd954 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -545,7 +545,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
{
switch (isa)
{
- case InstructionSet_SSE2:
case InstructionSet_SSE42:
case InstructionSet_AVX:
case InstructionSet_AVX2:
@@ -557,6 +556,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
return false;
case InstructionSet_SSE:
+ case InstructionSet_SSE2:
case InstructionSet_SSE3:
case InstructionSet_SSSE3:
case InstructionSet_SSE41:
@@ -1012,6 +1012,16 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
break;
}
+ case NI_SSE2_StoreNonTemporal:
+ {
+ assert(sig->numArgs == 2);
+ assert(JITtype2varType(sig->retType) == TYP_VOID);
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+ retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
+ break;
+ }
+
default:
JITDUMP("Not implemented hardware intrinsic");
break;
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index b5b88da982..c0cd91d6a2 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -196,6 +196,7 @@ INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE
#ifndef LEGACY_BACKEND
INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE)
+INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE)
INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE)
INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))