summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp61
-rw-r--r--src/jit/hwintrinsiclistxarch.h3
-rw-r--r--src/jit/hwintrinsicxarch.cpp23
-rw-r--r--src/jit/lsraxarch.cpp14
4 files changed, 97 insertions, 4 deletions
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 1d6380ebe0..7005da47f8 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -1318,6 +1318,67 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_AVX_SetAllVector256:
+ {
+ assert(op1 != nullptr);
+ assert(op2 == nullptr);
+ op1Reg = op1->gtRegNum;
+ if (varTypeIsIntegral(baseType))
+ {
+ // If the argument is a integer, it needs to be moved into a XMM register
+ regNumber tmpXMM = node->ExtractTempReg();
+ emit->emitIns_R_R(INS_mov_i2xmm, emitActualTypeSize(baseType), tmpXMM, op1Reg);
+ op1Reg = tmpXMM;
+ }
+
+ if (compiler->compSupports(InstructionSet_AVX2))
+ {
+ // generate broadcast instructions if AVX2 is available
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg);
+ }
+ else
+ {
+ // duplicate the scalar argument to XMM register
+ switch (baseType)
+ {
+ case TYP_FLOAT:
+ emit->emitIns_SIMD_R_R_I(INS_vpermilps, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
+ break;
+ case TYP_DOUBLE:
+ emit->emitIns_R_R(INS_movddup, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg);
+ break;
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ regNumber tmpZeroReg = node->GetSingleTempReg();
+ emit->emitIns_R_R(INS_pxor, emitTypeSize(TYP_SIMD16), tmpZeroReg, tmpZeroReg);
+ emit->emitIns_SIMD_R_R_R(INS_pshufb, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, tmpZeroReg);
+ break;
+ }
+ case TYP_SHORT:
+ case TYP_USHORT:
+ emit->emitIns_SIMD_R_R_I(INS_pshuflw, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
+ emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 80);
+ break;
+ case TYP_INT:
+ case TYP_UINT:
+ emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 68);
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+ // duplicate the XMM register to YMM register
+ emit->emitIns_SIMD_R_R_R_I(INS_vinsertf128, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op1Reg, 1);
+ }
+ break;
+ }
+
case NI_AVX_ExtendToVector256:
{
// ExtendToVector256 has zero-extend semantics in order to ensure it is deterministic
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 1ce47f6e36..b7a769a4db 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -348,6 +348,7 @@ HARDWARE_INTRINSIC(AVX_Divide, "Divide",
HARDWARE_INTRINSIC(AVX_DotProduct, "DotProduct", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed, "DuplicateEvenIndexed", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed, "DuplicateOddIndexed", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_Extract, "Extract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AVX_ExtendToVector256, "ExtendToVector256", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_ExtractVector128, "ExtractVector128", AVX, -1, 32, -1, {INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128, INS_vextractf128},HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_Floor, "Floor", AVX, 9, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
@@ -375,6 +376,8 @@ HARDWARE_INTRINSIC(AVX_RoundToNearestInteger, "RoundToNea
HARDWARE_INTRINSIC(AVX_RoundToNegativeInfinity, "RoundToNegativeInfinity", AVX, 9, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_RoundToPositiveInfinity, "RoundToPositiveInfinity", AVX, 10, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_RoundToZero, "RoundToZero", AVX, 11, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_SetVector256, "SetVector256", AVX, -1, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SecondArgMaybe64Bit)
+HARDWARE_INTRINSIC(AVX_SetAllVector256, "SetAllVector256", AVX, -1, 32, 1, {INS_vpbroadcastb,INS_vpbroadcastb,INS_vpbroadcastw,INS_vpbroadcastw,INS_vpbroadcastd,INS_vpbroadcastd,INS_vpbroadcastq,INS_vpbroadcastq,INS_vbroadcastss,INS_vbroadcastsd},HW_Category_Helper, HW_Flag_MultiIns|HW_Flag_SpecialImport|HW_Flag_OneTypeGeneric)
HARDWARE_INTRINSIC(AVX_SetZeroVector256, "SetZeroVector256", AVX, -1, 32, 0, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_xorps, INS_xorpd}, HW_Category_Helper, HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_Shuffle, "Shuffle", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_Sqrt, "Sqrt", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index f06ca0d585..1c80714918 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -1108,11 +1108,11 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
GenTree* half = nullptr;
- if (ival >= halfIndex)
+ if (ival >= midIndex)
{
half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
baseType, 32);
- ival -= halfIndex;
+ ival -= midIndex;
}
else
{
@@ -1144,12 +1144,12 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
(unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));
- if (ival >= halfIndex)
+ if (ival >= midIndex)
{
GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
NI_AVX_ExtractVector128, baseType, 32);
GenTree* ModifiedHalfVector =
- gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - halfIndex),
+ gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - midIndex),
insertIntrinsic, baseType, 16);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
NI_AVX_InsertVector128, baseType, 32);
@@ -1215,6 +1215,21 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
break;
}
+ case NI_AVX_SetAllVector256:
+ {
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+#ifdef _TARGET_X86_
+ // TODO-XARCH: support long/ulong on 32-bit platfroms
+ if (varTypeIsLong(baseType))
+ {
+ return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
+ }
+#endif
+ GenTree* arg = impPopStack().val;
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, arg, NI_AVX_SetAllVector256, baseType, 32);
+ break;
+ }
+
case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
{
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 2d7e6564e3..09cc9ca96e 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -2368,6 +2368,20 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
break;
}
+ case NI_AVX_SetAllVector256:
+ {
+ if (varTypeIsIntegral(baseType))
+ {
+ info->internalFloatCount = 1;
+ if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsByte(baseType))
+ {
+ info->internalFloatCount += 1;
+ }
+ info->setInternalCandidates(this, allSIMDRegs());
+ }
+ break;
+ }
+
case NI_SSE2_MaskMove:
{
// SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI