summaryrefslogtreecommitdiff
path: root/src/jit/simd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/simd.cpp')
-rw-r--r--src/jit/simd.cpp137
1 files changed, 87 insertions, 50 deletions
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 1f0c867b55..39664c47bf 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -77,10 +77,10 @@ int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd)
//
int Compiler::getSIMDTypeAlignment(var_types simdType)
{
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// Fixed length vectors have the following alignment preference
- // Vector2/3 = 8 byte alignment
- // Vector4 = 16-byte alignment
+ // Vector2 = 8 byte alignment
+ // Vector3/4 = 16-byte alignment
unsigned size = genTypeSize(simdType);
// preferred alignment for SSE2 128-bit vectors is 16-bytes
@@ -88,13 +88,16 @@ int Compiler::getSIMDTypeAlignment(var_types simdType)
{
return 8;
}
-
- // As per Intel manual, AVX vectors preferred alignment is 32-bytes but on Amd64
- // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
- // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
- // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
- // 256-bit vectors with unaligned load/stores to/from memory.
- return 16;
+ else if (size <= 16)
+ {
+ assert((size == 12) || (size == 16));
+ return 16;
+ }
+ else
+ {
+ assert(size == 32);
+ return 32;
+ }
#else
assert(!"getSIMDTypeAlignment() unimplemented on target arch");
unreached();
@@ -391,7 +394,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd;
*baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
- bool isHWAcceleratedIntrinsic = false;
if (typeHnd == SIMDVectorHandle)
{
// All of the supported intrinsics on this static class take a first argument that's a vector,
@@ -424,6 +426,16 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
return nullptr;
}
+#ifdef _TARGET_X86_
+ // NYI: support LONG type SIMD intrinsics. Need support in long decomposition.
+ // (Don't use NYI fallback mechanism; just call the function.)
+ if ((*baseType == TYP_LONG) || (*baseType == TYP_ULONG))
+ {
+ JITDUMP("NYI: x86 long base type SIMD intrinsics\n");
+ return nullptr;
+ }
+#endif // _TARGET_X86_
+
// account for implicit "this" arg
*argCount = sig->numArgs;
if (sig->hasThis())
@@ -525,7 +537,8 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
// We don't check anything in that case.
if (!isThisPtr || !isNewObj)
{
- GenTreePtr arg = impStackTop(stackIndex).val;
+ GenTreePtr arg = impStackTop(stackIndex).val;
+ var_types argType = arg->TypeGet();
var_types expectedArgType;
if (argIndex < fixedArgCnt)
@@ -540,6 +553,7 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
{
// The type of the argument will be genActualType(*baseType).
expectedArgType = genActualType(*baseType);
+ argType = genActualType(argType);
}
}
else
@@ -547,7 +561,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
expectedArgType = *baseType;
}
- var_types argType = arg->TypeGet();
if (!isThisPtr && argType == TYP_I_IMPL)
{
// The reference implementation has a constructor that takes a pointer.
@@ -715,7 +728,7 @@ GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, var_types baseType, u
return simdTree;
}
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// impSIMDLongRelOpEqual: transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain == comparison result.
//
@@ -741,7 +754,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
//
// Equality(v1, v2):
// tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int>
- // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0)))
+ // result = BitwiseAnd(t, shuffle(t, (2, 3, 0, 1)))
// Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements.
// Compare vector<long> as if they were vector<int> and assign the result to a temp
@@ -755,7 +768,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
// op2 = Shuffle(tmp, 0xB1)
// IntrinsicId = BitwiseAnd
*pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp);
- *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT),
+ *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWXY, TYP_INT),
SIMDIntrinsicShuffleSSE2, TYP_INT, size);
return SIMDIntrinsicBitwiseAnd;
}
@@ -971,7 +984,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
return SIMDIntrinsicBitwiseOr;
}
-#endif //_TARGET_AMD64_
+#endif // _TARGET_XARCH_
// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain given relop result.
@@ -999,7 +1012,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
assert(isRelOpSIMDIntrinsic(relOpIntrinsicId));
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
SIMDIntrinsicID intrinsicID = relOpIntrinsicId;
var_types baseType = *inOutBaseType;
@@ -1076,7 +1089,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
//
// We need to treat op1 and op2 as signed for comparison purpose after
// the transformation.
- ssize_t constVal = 0;
+ __int64 constVal = 0;
switch (baseType)
{
case TYP_UBYTE:
@@ -1105,9 +1118,19 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
if (intrinsicID != SIMDIntrinsicEqual)
{
// For constructing const vector use either long or int base type.
- var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT;
- GenTree* initVal = gtNewIconNode(constVal);
- initVal->gtType = tempBaseType;
+ var_types tempBaseType;
+ GenTree* initVal;
+ if (baseType == TYP_ULONG)
+ {
+ tempBaseType = TYP_LONG;
+ initVal = gtNewLconNode(constVal);
+ }
+ else
+ {
+ tempBaseType = TYP_INT;
+ initVal = gtNewIconNode((ssize_t)constVal);
+ }
+ initVal->gtType = tempBaseType;
GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size);
// Assign constVector to a temp, since we intend to use it more than once
@@ -1127,10 +1150,10 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
}
return intrinsicID;
-#else
+#else // !_TARGET_XARCH_
assert(!"impSIMDRelOp() unimplemented on target arch");
unreached();
-#endif //_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
// Creates a GT_SIMD tree for Select operation
@@ -1210,7 +1233,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
var_types simdType = op1->TypeGet();
assert(op2->TypeGet() == simdType);
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// SSE2 has direct support for float/double/signed word/unsigned byte.
// For other integer types we compute min/max as follows
//
@@ -1347,10 +1370,10 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
assert(simdTree != nullptr);
return simdTree;
-#else
+#else // !_TARGET_XARCH_
assert(!"impSIMDMinMax() unimplemented on target arch");
unreached();
-#endif //_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
//------------------------------------------------------------------------
@@ -1791,6 +1814,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
int length = getSIMDVectorLength(clsHnd);
GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, length);
retVal = intConstTree;
+
+ intConstTree->gtFlags |= GTF_ICON_SIMD_COUNT;
}
break;
@@ -2223,7 +2248,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
assert(op2->TypeGet() == simdType);
simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
@@ -2234,7 +2263,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
op2 = impSIMDPopStack(simdType);
op1 = impSIMDPopStack(simdType, instMethod);
simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
@@ -2262,7 +2295,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicBitwiseOr:
case SIMDIntrinsicBitwiseXor:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
+#if defined(_TARGET_XARCH_) && defined(DEBUG)
// check for the cases where we don't support intrinsics.
// This check should be done before we make modifications to type stack.
// Note that this is more of a double safety check for robustness since
@@ -2290,7 +2323,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}
}
-#endif //_TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_ && DEBUG
// op1 is the first operand; if instance method, op1 is "this" arg
// op2 is the second operand
@@ -2331,9 +2364,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
{
// op1 is a SIMD variable that is "this" arg
// op2 is an index of TYP_INT
- op2 = impSIMDPopStack(TYP_INT);
- op1 = impSIMDPopStack(simdType, instMethod);
- unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+ op2 = impSIMDPopStack(TYP_INT);
+ op1 = impSIMDPopStack(simdType, instMethod);
+ int vectorLength = getSIMDVectorLength(size, baseType);
if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength)
{
// We need to bounds-check the length of the vector.
@@ -2366,15 +2399,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicDotProduct:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
- // Right now dot product is supported only for float vectors.
- // See SIMDIntrinsicList.h for supported base types for this intrinsic.
- if (!varTypeIsFloating(baseType))
+#if defined(_TARGET_XARCH_)
+ // Right now dot product is supported only for float/double vectors and
+ // int vectors on SSE4/AVX.
+ if (!varTypeIsFloating(baseType) &&
+ !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4))
{
- assert(!"Dot product on integer type vectors not supported");
return nullptr;
}
-#endif //_TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_
// op1 is a SIMD variable that is the first source and also "this" arg.
// op2 is a SIMD variable which is the second source.
@@ -2382,13 +2415,17 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
op1 = impSIMDPopStack(simdType, instMethod);
simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
case SIMDIntrinsicSqrt:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
+#if defined(_TARGET_XARCH_) && defined(DEBUG)
// SSE/AVX doesn't support sqrt on integer type vectors and hence
// should never be seen as an intrinsic here. See SIMDIntrinsicList.h
// for supported base types for this intrinsic.
@@ -2397,7 +2434,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
assert(!"Sqrt not supported on integer vectors\n");
return nullptr;
}
-#endif // _TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_ && DEBUG
op1 = impSIMDPopStack(simdType);
@@ -2409,7 +2446,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
{
op1 = impSIMDPopStack(simdType);
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
if (varTypeIsFloating(baseType))
{
// Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
@@ -2448,10 +2485,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
unreached();
}
-#else //!_TARGET_AMD64_
- assert(!"Abs intrinsic on non-Amd64 target not implemented");
+#else // !_TARGET_XARCH_
+ assert(!"Abs intrinsic on non-xarch target not implemented");
unreached();
-#endif //!_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
break;
@@ -2524,15 +2561,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}
-#ifdef _TARGET_AMD64_
- // Amd64: also indicate that we use floating point registers.
+#ifdef _TARGET_XARCH_
+ // XArch: also indicate that we use floating point registers.
// The need for setting this here is that a method may not have SIMD
// type lclvars, but might be exercising SIMD intrinsics on fields of
// SIMD type.
//
// e.g. public Vector<float> ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; }
compFloatingPointUsed = true;
-#endif
+#endif // _TARGET_XARCH_
// At this point, we have a tree that we are going to store into a destination.
// TODO-1stClassStructs: This should be a simple store or assignment, and should not require