diff options
author | sivarv <sivarv@microsoft.com> | 2016-12-21 15:17:08 -0800 |
---|---|---|
committer | sivarv <sivarv@microsoft.com> | 2016-12-21 15:50:56 -0800 |
commit | 7c887cfd237e767454c7a8eca51aac5a6c4357aa (patch) | |
tree | 7783601d8d271cd2ceedad0216c6193576512ffe /src/jit | |
parent | 77561c87ed48f5af122aecc104f6a0b6a6382121 (diff) | |
download | coreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.tar.gz coreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.tar.bz2 coreclr-7c887cfd237e767454c7a8eca51aac5a6c4357aa.zip |
Use Pabsd/pabsw/pabsb instructions for Abs SIMD intrinsic on SSE4 and above targets.
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/instrsxarch.h | 3 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 11 | ||||
-rw-r--r-- | src/jit/simd.cpp | 32 | ||||
-rw-r--r-- | src/jit/simdcodegenxarch.cpp | 22 | ||||
-rw-r--r-- | src/jit/simdintrinsiclist.h | 2 |
5 files changed, 60 insertions, 10 deletions
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 4317334bf2..8ab3a845ba 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -320,6 +320,9 @@ INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add +INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes +INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers +INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 9792e0ed2e..bf5d29c596 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2791,9 +2791,14 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) break; case SIMDIntrinsicAbs: - // This gets implemented as bitwise-And operation with a mask - // and hence should never see it here. - unreached(); + // float/double vectors: This gets implemented as bitwise-And operation + // with a mask and hence should never see here. + // + // Must be a Vector<int> or Vector<short> Vector<sbyte> + assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT || + simdTree->gtSIMDBaseType == TYP_BYTE); + assert(comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4); + info->srcCount = 1; break; case SIMDIntrinsicSqrt: diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index 39664c47bf..914ad94a48 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -2444,9 +2444,31 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicAbs: { +#ifdef _TARGET_XARCH_ + // TODO-CQ-XARCH: we should be able to use conditional select + // to compute abs(v) as follows when there is no direct support: + // BitVector = v < vector.Zero + // result = ConditionalSelect(BitVector, vector.Zero - v, v) + if (getSIMDInstructionSet() == InstructionSet_SSE2) + { + // SSE2 doesn't support abs on signed integer type vectors. + if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE) + { + return nullptr; + } + } + else + { + assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4); + if (baseType == TYP_LONG) + { + // SSE3_4/AVX2 don't support abs on long type vector. + return nullptr; + } + } + op1 = impSIMDPopStack(simdType); -#ifdef _TARGET_XARCH_ if (varTypeIsFloating(baseType)) { // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff); @@ -2479,10 +2501,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, } else { - // SSE/AVX doesn't support abs on signed integer vectors and hence - // should never be seen as an intrinsic here. See SIMDIntrinsicList.h - // for supported base types for this intrinsic. - unreached(); + assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4); + assert(baseType != TYP_LONG); + + retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); } #else // !_TARGET_XARCH_ diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index ec933fd5d7..c816fd0691 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -272,6 +272,24 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; + case SIMDIntrinsicAbs: + if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) + { + if (baseType == TYP_INT) + { + result = INS_pabsd; + } + else if (baseType == TYP_SHORT) + { + result = INS_pabsw; + } + else if (baseType == TYP_BYTE) + { + result = INS_pabsb; + } + } + break; + case SIMDIntrinsicEqual: if (baseType == TYP_FLOAT) { @@ -811,7 +829,8 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; @@ -2274,6 +2293,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicSqrt: case SIMDIntrinsicCast: + case SIMDIntrinsicAbs: genSIMDIntrinsicUnOp(simdNode); break; diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h index c81f7b4bf0..c717c145cf 100644 --- a/src/jit/simdintrinsiclist.h +++ b/src/jit/simdintrinsiclist.h @@ -90,7 +90,7 @@ SIMD_INTRINSIC("op_Multiply", false, Mul, SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) // Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors -SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) +SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_INT, TYP_SHORT, TYP_BYTE, TYP_UNDEF}) SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) // Min and max methods are recognized as intrinsics only in case of float or double vectors |