summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJacek Blaszczynski <biosciencenow@outlook.com>2018-02-28 06:08:55 +0100
committerJacek Blaszczynski <biosciencenow@outlook.com>2018-03-02 23:48:49 +0100
commit978a5494fafede99d1ea9e8ca691b873e15885d8 (patch)
tree8e214548af5cb2ae5144b55822fea860de8ef92c /src
parentac2cea83feefcf86fdf50f43821f544e8e521968 (diff)
downloadcoreclr-978a5494fafede99d1ea9e8ca691b873e15885d8.tar.gz
coreclr-978a5494fafede99d1ea9e8ca691b873e15885d8.tar.bz2
coreclr-978a5494fafede99d1ea9e8ca691b873e15885d8.zip
Implement Shuffle* SSE2 hardware intrinsics
Diffstat (limited to 'src')
-rw-r--r--src/jit/compiler.h2
-rw-r--r--src/jit/emitxarch.cpp19
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp2
-rw-r--r--src/jit/hwintrinsiclistxarch.h11
-rw-r--r--src/jit/hwintrinsicxarch.cpp53
-rw-r--r--src/jit/instrsxarch.h6
-rw-r--r--src/jit/lowerxarch.cpp2
-rw-r--r--src/jit/lsraxarch.cpp6
8 files changed, 81 insertions, 20 deletions
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 7bee3bfcdd..20f71b701a 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -3107,7 +3107,7 @@ protected:
bool isScalarISA(InstructionSet isa);
static int ivalOfHWIntrinsic(NamedIntrinsic intrinsic);
unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig);
- static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic);
+ static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node = nullptr);
static GenTree* lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs);
static instruction insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type);
static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic);
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 4ffe342c99..0bd85f256b 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -5535,15 +5535,28 @@ static bool isSseShift(instruction ins)
}
}
-static bool isSSEExtract(instruction ins)
+//------------------------------------------------------------------------
+// IsDstSrcImmAvxInstruction: check if instruction has RM R I format
+// for all encodings: EVEX, VEX and legacy SSE
+//
+// Arguments:
+// instruction -- processor instruction to check
+//
+// Return Value:
+// true if instruction has RRI format
+//
+static bool IsDstSrcImmAvxInstruction(instruction ins)
{
switch (ins)
{
+ case INS_extractps:
case INS_pextrb:
case INS_pextrw:
case INS_pextrd:
case INS_pextrq:
- case INS_extractps:
+ case INS_pshufd:
+ case INS_pshufhw:
+ case INS_pshuflw:
return true;
default:
return false;
@@ -5554,7 +5567,7 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg,
{
// TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I
bool isShift = isSseShift(ins);
- if (isSSEExtract(ins) || (UseVEXEncoding() && !isShift))
+ if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift))
{
emitIns_R_R_I(ins, attr, reg, reg1, ival);
}
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 2f0e5f8c41..c4d554662f 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -56,7 +56,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID);
HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID);
int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
- int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+ int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node);
assert((flags & HW_Flag_NoCodeGen) == 0);
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 5365c6e3d0..c426071c7c 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -194,7 +194,7 @@ HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAligne
HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_MaxScalar, "MaxScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
@@ -217,14 +217,17 @@ HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane, "ShiftLeftL
HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic, "ShiftRightArithmetic", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_ShiftRightLogical, "ShiftRightLogical", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane, "ShiftRightLogical128BitLane", SSE2, -1, 16, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_Shuffle, "Shuffle", SSE2, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_ShuffleHigh, "ShuffleHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_ShuffleLow, "ShuffleLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_Sqrt, "Sqrt", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_SqrtScalar, "SqrtScalar", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Store, "Store", SSE2, -1, 16, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAligned", SSE2, -1, 16, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_SubtractScalar, "SubtractScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
@@ -320,7 +323,7 @@ HARDWARE_INTRINSIC(SSE42_CompareGreaterThan, "CompareGre
// AVX Intrinsics
HARDWARE_INTRINSIC(AVX_IsSupported, "get_IsSupported", AVX, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_Add, "Add", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_And, "And", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX_AndNot, "AndNot", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_Blend, "Blend", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 86a9489be0..ef64c4e35c 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -219,19 +219,64 @@ unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_I
}
//------------------------------------------------------------------------
-// numArgsOfHWIntrinsic: get the number of arguments
+// numArgsOfHWIntrinsic: get the number of arguments based on table and
+// if numArgs is -1 check number of arguments using GenTreeHWIntrinsic
+// node unless it is nullptr
//
// Arguments:
-// intrinsic -- id of the intrinsic function.
+// intrinsic -- id of the intrinsic function
+// node -- GenTreeHWIntrinsic* node with nullptr default value
//
// Return Value:
// number of arguments
//
-int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic)
+int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node)
{
assert(intrinsic != NI_Illegal);
assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
- return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
+
+ int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
+ if (numArgs >= 0)
+ {
+ return numArgs;
+ }
+
+ noway_assert(node != nullptr);
+ assert(numArgs == -1);
+
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+
+ if (op2 != nullptr)
+ {
+ return 2;
+ }
+
+ if (op1 != nullptr)
+ {
+ if (op1->OperIsList())
+ {
+ numArgs = 0;
+ GenTreeArgList* list = op1->AsArgList();
+
+ while (list != nullptr)
+ {
+ numArgs++;
+ list = list->Rest();
+ }
+
+ assert(numArgs > 0);
+ return numArgs;
+ }
+ else
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 0;
+ }
}
//------------------------------------------------------------------------
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index d448afc977..71da647163 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -369,8 +369,10 @@ INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE,
INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than
INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers
+INST3( pshufhw, "pshufhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x70)) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
+INST3( pshuflw, "pshuflw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x70)) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits
-INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // packed insert word
+INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // Insert word at index
INST3( punpckhbw, "punpckhbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x68)) // Packed logical (unsigned) widen ubyte to ushort (hi)
INST3( punpcklbw, "punpcklbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x60)) // Packed logical (unsigned) widen ubyte to ushort (lo)
@@ -448,7 +450,7 @@ INST3( phsubsw, "phsubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer
INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint
INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values
-INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values
+INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values
INST3( movshdup, "movshdup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x16)) // Replicate odd-indexed Single FP Values
INST3( phminposuw, "phminposuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x41)) // Packed Horizontal Word Minimum
INST3( mpsadbw, "mpsadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x42)) // Compute Multiple Packed Sums of Absolute Difference
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 3f1deb825f..45799e3267 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2367,7 +2367,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID);
HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID);
- int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+ int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node);
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index fdb875a827..296d558fba 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -2259,7 +2259,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID);
HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID);
- int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+ int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, intrinsicTree);
if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
{
@@ -2321,11 +2321,9 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
assert((flags & HW_Flag_NoCodeGen) == 0);
- assert(numArgs != 0);
- assert(numArgs != 1);
-
if (info->srcCount >= 2)
{
+ assert(numArgs >= 2);
LocationInfoListNode* op2Info = useList.Begin()->Next();
op2Info->info.isDelayFree = true;
info->hasDelayFreeSrc = true;