summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorsivarv <sivarv@microsoft.com>2016-12-21 15:17:08 -0800
committersivarv <sivarv@microsoft.com>2016-12-21 15:50:56 -0800
commit7c887cfd237e767454c7a8eca51aac5a6c4357aa (patch)
tree7783601d8d271cd2ceedad0216c6193576512ffe /src/jit
parent77561c87ed48f5af122aecc104f6a0b6a6382121 (diff)
downloadcoreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.tar.gz
coreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.tar.bz2
coreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.zip
Use Pabsd/pabsw/pabsb instructions for Abs SIMD intrinsic on SSE4 and above targets.
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/instrsxarch.h3
-rw-r--r--src/jit/lowerxarch.cpp11
-rw-r--r--src/jit/simd.cpp32
-rw-r--r--src/jit/simdcodegenxarch.cpp22
-rw-r--r--src/jit/simdintrinsiclist.h2
5 files changed, 60 insertions, 10 deletions
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 4317334bf2..8ab3a845ba 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -320,6 +320,9 @@ INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
+INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
+INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
+INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 9792e0ed2e..bf5d29c596 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2791,9 +2791,14 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
break;
case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
+ // float/double vectors: This gets implemented as bitwise-And operation
+ // with a mask and hence should never see here.
+ //
+ // Must be a Vector<int> or Vector<short> Vector<sbyte>
+ assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
+ simdTree->gtSIMDBaseType == TYP_BYTE);
+ assert(comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ info->srcCount = 1;
break;
case SIMDIntrinsicSqrt:
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 39664c47bf..914ad94a48 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -2444,9 +2444,31 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicAbs:
{
+#ifdef _TARGET_XARCH_
+ // TODO-CQ-XARCH: we should be able to use conditional select
+ // to compute abs(v) as follows when there is no direct support:
+ // BitVector = v < vector.Zero
+ // result = ConditionalSelect(BitVector, vector.Zero - v, v)
+ if (getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ // SSE2 doesn't support abs on signed integer type vectors.
+ if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE)
+ {
+ return nullptr;
+ }
+ }
+ else
+ {
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ if (baseType == TYP_LONG)
+ {
+ // SSE3_4/AVX2 don't support abs on long type vector.
+ return nullptr;
+ }
+ }
+
op1 = impSIMDPopStack(simdType);
-#ifdef _TARGET_XARCH_
if (varTypeIsFloating(baseType))
{
// Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
@@ -2479,10 +2501,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
}
else
{
- // SSE/AVX doesn't support abs on signed integer vectors and hence
- // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
- // for supported base types for this intrinsic.
- unreached();
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(baseType != TYP_LONG);
+
+ retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size);
}
#else // !_TARGET_XARCH_
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index ec933fd5d7..c816fd0691 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -272,6 +272,24 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
}
break;
+ case SIMDIntrinsicAbs:
+ if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ {
+ if (baseType == TYP_INT)
+ {
+ result = INS_pabsd;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pabsw;
+ }
+ else if (baseType == TYP_BYTE)
+ {
+ result = INS_pabsb;
+ }
+ }
+ break;
+
case SIMDIntrinsicEqual:
if (baseType == TYP_FLOAT)
{
@@ -811,7 +829,8 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs);
GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->gtSIMDBaseType;
@@ -2274,6 +2293,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
case SIMDIntrinsicSqrt:
case SIMDIntrinsicCast:
+ case SIMDIntrinsicAbs:
genSIMDIntrinsicUnOp(simdNode);
break;
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
index c81f7b4bf0..c717c145cf 100644
--- a/src/jit/simdintrinsiclist.h
+++ b/src/jit/simdintrinsiclist.h
@@ -90,7 +90,7 @@ SIMD_INTRINSIC("op_Multiply", false, Mul,
SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
// Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors
-SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_INT, TYP_SHORT, TYP_BYTE, TYP_UNDEF})
SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
// Min and max methods are recognized as intrinsics only in case of float or double vectors