summaryrefslogtreecommitdiff
path: root/src/jit/hwintrinsicxarch.cpp
diff options
context:
space:
mode:
authorFei Peng <fei.peng@intel.com>2018-03-19 00:16:34 -0700
committerTanner Gooding <tagoo@outlook.com>2018-03-22 09:03:09 -0700
commit98e924c6f58a4c349df674668f6f96611425e0bf (patch)
tree648938812d3aee0de432755b6830e65ea5426320 /src/jit/hwintrinsicxarch.cpp
parentd334e0e467500615e7ff9c4f3b311a722d407821 (diff)
downloadcoreclr-98e924c6f58a4c349df674668f6f96611425e0bf.tar.gz
coreclr-98e924c6f58a4c349df674668f6f96611425e0bf.tar.bz2
coreclr-98e924c6f58a4c349df674668f6f96611425e0bf.zip
Optimize AVX Insert/Extract intrinsics
Diffstat (limited to 'src/jit/hwintrinsicxarch.cpp')
-rw-r--r--src/jit/hwintrinsicxarch.cpp95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index c52958a170..75eaf1f0fe 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -1058,6 +1058,26 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic,
return retNode;
}
+//------------------------------------------------------------------------
+// normalizeAndGetHalfIndex: compute the half index of a Vector256<baseType>
+// and normalize the index to the specific range
+//
+// Arguments:
+// indexPtr -- OUT paramter, the pointer to the original index value
+// baseType -- the base type of the Vector256<T>
+//
+// Return Value:
+// retuen the middle index of a Vector256<baseType>
+// return the normalized index via indexPtr
+//
+static int normalizeAndGetHalfIndex(int* indexPtr, var_types baseType)
+{
+ assert(varTypeIsArithmetic(baseType));
+ // clear the unused bits to normalize the index into the range of [0, length of Vector256<baseType>)
+ *indexPtr = (*indexPtr) & (32 / genTypeSize(baseType) - 1);
+ return (16 / genTypeSize(baseType));
+}
+
GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
@@ -1071,6 +1091,81 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
switch (intrinsic)
{
+ case NI_AVX_Extract:
+ {
+ // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
+ assert(!mustExpand);
+
+ GenTree* lastOp = impPopStack().val;
+ GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
+ assert(lastOp->IsCnsIntOrI());
+ int ival = (int)lastOp->AsIntCon()->IconValue();
+ baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+ var_types retType = JITtype2varType(sig->retType);
+ assert(varTypeIsArithmetic(baseType));
+
+ int midIndex = normalizeAndGetHalfIndex(&ival, baseType);
+ NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
+ GenTree* half = nullptr;
+
+ if (ival >= halfIndex)
+ {
+ half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
+ baseType, 32);
+ ival -= halfIndex;
+ }
+ else
+ {
+ half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
+ }
+
+ retNode = gtNewSimdHWIntrinsicNode(retType, half, gtNewIconNode(ival), extractIntrinsic, baseType, 16);
+ break;
+ }
+
+ case NI_AVX_Insert:
+ {
+ // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
+ assert(!mustExpand);
+
+ GenTree* lastOp = impPopStack().val;
+ GenTree* dataOp = impPopStack().val;
+ GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
+ assert(lastOp->IsCnsIntOrI());
+ int ival = (int)lastOp->AsIntCon()->IconValue();
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ assert(varTypeIsArithmetic(baseType));
+
+ int midIndex = normalizeAndGetHalfIndex(&ival, baseType);
+ NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
+
+ GenTree* clonedVectorOp;
+ vectorOp =
+ impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
+ (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));
+
+ if (ival >= halfIndex)
+ {
+ GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
+ NI_AVX_ExtractVector128, baseType, 32);
+ GenTree* ModifiedHalfVector =
+ gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - halfIndex),
+ insertIntrinsic, baseType, 16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
+ NI_AVX_InsertVector128, baseType, 32);
+ }
+ else
+ {
+ GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
+ GenTree* ModifiedHalfVector =
+ gtNewSimdHWIntrinsicNode(TYP_SIMD32, halfVector, dataOp, gtNewIconNode(ival), insertIntrinsic,
+ baseType, 16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(15),
+ NI_AVX_Blend, TYP_FLOAT, 32);
+ }
+ break;
+ }
+
case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
{