summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSivarv <sivarv@microsoft.com>2016-12-21 17:34:39 -0800
committerGitHub <noreply@github.com>2016-12-21 17:34:39 -0800
commite88965d26433025ff36b9164b06833ca2253b61c (patch)
tree503dc10f4bc434ab497b5e83d166d3273d1696bc
parent3f9ec2a41a809ae6e39e727871d1d7d6c29106b7 (diff)
parent7c887cfd237e767454c7a8eca51aac5a6c4357aa (diff)
downloadcoreclr-e88965d26433025ff36b9164b06833ca2253b61c.tar.gz
coreclr-e88965d26433025ff36b9164b06833ca2253b61c.tar.bz2
coreclr-e88965d26433025ff36b9164b06833ca2253b61c.zip
Merge pull request #8706 from sivarv/simdAbs
Use Pabsd/pabsw/pabsb instructions for Abs SIMD intrinsic on SSE3_4 and above targets.
-rw-r--r--src/jit/instrsxarch.h3
-rw-r--r--src/jit/lowerxarch.cpp11
-rw-r--r--src/jit/simd.cpp32
-rw-r--r--src/jit/simdcodegenxarch.cpp22
-rw-r--r--src/jit/simdintrinsiclist.h2
-rw-r--r--tests/src/JIT/SIMD/VectorAbs.cs15
6 files changed, 74 insertions, 11 deletions
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 4317334bf2..8ab3a845ba 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -320,6 +320,9 @@ INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
+INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
+INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
+INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 9792e0ed2e..bf5d29c596 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2791,9 +2791,14 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
break;
case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
+ // float/double vectors: This gets implemented as bitwise-And operation
+ // with a mask and hence should never see here.
+ //
+ // Must be a Vector<int> or Vector<short> Vector<sbyte>
+ assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
+ simdTree->gtSIMDBaseType == TYP_BYTE);
+ assert(comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ info->srcCount = 1;
break;
case SIMDIntrinsicSqrt:
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 39664c47bf..914ad94a48 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -2444,9 +2444,31 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicAbs:
{
+#ifdef _TARGET_XARCH_
+ // TODO-CQ-XARCH: we should be able to use conditional select
+ // to compute abs(v) as follows when there is no direct support:
+ // BitVector = v < vector.Zero
+ // result = ConditionalSelect(BitVector, vector.Zero - v, v)
+ if (getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ // SSE2 doesn't support abs on signed integer type vectors.
+ if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE)
+ {
+ return nullptr;
+ }
+ }
+ else
+ {
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ if (baseType == TYP_LONG)
+ {
+ // SSE3_4/AVX2 don't support abs on long type vector.
+ return nullptr;
+ }
+ }
+
op1 = impSIMDPopStack(simdType);
-#ifdef _TARGET_XARCH_
if (varTypeIsFloating(baseType))
{
// Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
@@ -2479,10 +2501,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
}
else
{
- // SSE/AVX doesn't support abs on signed integer vectors and hence
- // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
- // for supported base types for this intrinsic.
- unreached();
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(baseType != TYP_LONG);
+
+ retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size);
}
#else // !_TARGET_XARCH_
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index ec933fd5d7..c816fd0691 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -272,6 +272,24 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
}
break;
+ case SIMDIntrinsicAbs:
+ if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ {
+ if (baseType == TYP_INT)
+ {
+ result = INS_pabsd;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pabsw;
+ }
+ else if (baseType == TYP_BYTE)
+ {
+ result = INS_pabsb;
+ }
+ }
+ break;
+
case SIMDIntrinsicEqual:
if (baseType == TYP_FLOAT)
{
@@ -811,7 +829,8 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs);
GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->gtSIMDBaseType;
@@ -2274,6 +2293,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
case SIMDIntrinsicSqrt:
case SIMDIntrinsicCast:
+ case SIMDIntrinsicAbs:
genSIMDIntrinsicUnOp(simdNode);
break;
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
index c81f7b4bf0..c717c145cf 100644
--- a/src/jit/simdintrinsiclist.h
+++ b/src/jit/simdintrinsiclist.h
@@ -90,7 +90,7 @@ SIMD_INTRINSIC("op_Multiply", false, Mul,
SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
// Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors
-SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_INT, TYP_SHORT, TYP_BYTE, TYP_UNDEF})
SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
// Min and max methods are recognized as intrinsics only in case of float or double vectors
diff --git a/tests/src/JIT/SIMD/VectorAbs.cs b/tests/src/JIT/SIMD/VectorAbs.cs
index 64a7bde17f..ab18272b18 100644
--- a/tests/src/JIT/SIMD/VectorAbs.cs
+++ b/tests/src/JIT/SIMD/VectorAbs.cs
@@ -76,6 +76,8 @@ internal partial class VectorTest
if (VectorAbsTest<Double>.VectorAbs(-1d, 1d) != Pass) returnVal = Fail;
if (VectorAbsTest<int>.VectorAbs(-1, 1) != Pass) returnVal = Fail;
if (VectorAbsTest<long>.VectorAbs(-1, 1) != Pass) returnVal = Fail;
+ if (VectorAbsTest<short>.VectorAbs((short)-1, (short)1) != Pass) returnVal = Fail;
+ if (VectorAbsTest<sbyte>.VectorAbs((sbyte)-1, (sbyte)1) != Pass) returnVal = Fail;
if (Vector4Test.VectorAbs() != Pass) returnVal = Fail;
if (Vector3Test.VectorAbs() != Pass) returnVal = Fail;
if (Vector2Test.VectorAbs() != Pass) returnVal = Fail;
@@ -87,7 +89,10 @@ internal partial class VectorTest
JitLog jitLog = new JitLog();
if (!jitLog.Check("Abs", "Single")) returnVal = Fail;
if (!jitLog.Check("Abs", "Double")) returnVal = Fail;
- // Abs is not an intrinsic for Int32 and Int64, but IS for UInt32 and UInt64
+ // SSE2: Abs is not an intrinsic for Int32 and Int64, but IS for UInt32 and UInt64
+ // SSE3_4: Abs is not an intrinsic for Int64 alone.
+ // Since right now there is no way to know SIMD instruction set used by JIT,
+ // we will check conservatively on SSE3_4 targets.
if (!jitLog.Check("System.Numerics.Vector4:Abs")) returnVal = Fail;
if (!jitLog.Check("System.Numerics.Vector3:Abs")) returnVal = Fail;
if (!jitLog.Check("System.Numerics.Vector2:Abs")) returnVal = Fail;
@@ -95,6 +100,14 @@ internal partial class VectorTest
if (!jitLog.Check("Abs", "Byte")) returnVal = Fail;
if (!jitLog.Check("Abs", "UInt32")) returnVal = Fail;
if (!jitLog.Check("Abs", "UInt64")) returnVal = Fail;
+
+ // AVX: Abs is not an intrinsic for Int64 alone.
+ if (Vector<int>.Count == 8)
+ {
+ if (!jitLog.Check("Abs", "Int32")) returnVal = Fail;
+ if (!jitLog.Check("Abs", "Int16")) returnVal = Fail;
+ if (!jitLog.Check("Abs", "SByte")) returnVal = Fail;
+ }
jitLog.Dispose();
return returnVal;