summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-02-03 09:28:56 -0800
committerTanner Gooding <tagoo@outlook.com>2018-02-05 21:52:56 -0800
commit7c11e00b125330371e83582e0e6cc99b40a65962 (patch)
tree27dac425cba9196abcec528084d0362e34e2b89e
parentae3b92ad2ade04709abb6d4846018d0a26b872c8 (diff)
downloadcoreclr-7c11e00b125330371e83582e0e6cc99b40a65962.tar.gz
coreclr-7c11e00b125330371e83582e0e6cc99b40a65962.tar.bz2
coreclr-7c11e00b125330371e83582e0e6cc99b40a65962.zip
Adding support for the StoreFence/Prefetch* APIs and the new Sse scalar overloads.
-rw-r--r--src/jit/emitxarch.cpp30
-rw-r--r--src/jit/emitxarch.h2
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp50
-rw-r--r--src/jit/hwintrinsiclistxarch.h185
-rw-r--r--src/jit/hwintrinsicxarch.cpp40
-rw-r--r--src/jit/instrsxarch.h6
6 files changed, 218 insertions, 95 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index e697b7bd57..2ae99e8edf 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -267,7 +267,9 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins)
bool emitter::TakesVexPrefix(instruction ins)
{
// special case vzeroupper as it requires 2-byte VEX prefix
- if (ins == INS_vzeroupper)
+ // special case sfence and the prefetch instructions as they never take a VEX prefix
+ if ((ins == INS_vzeroupper) || (ins == INS_sfence) || (ins == INS_prefetcht0) || (ins == INS_prefetcht1) ||
+ (ins == INS_prefetcht2) || (ins == INS_prefetchnta))
{
return false;
}
@@ -2027,7 +2029,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
|| (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
- || (ins == INS_movzx) || (ins == INS_movsx));
+ || (ins == INS_movzx) || (ins == INS_movsx) || (ins == INS_prefetcht0) ||
+ (ins == INS_prefetcht1) // the prefetch instructions are always 3 bytes
+ || (ins == INS_prefetcht2) || (ins == INS_prefetchnta)); // and have part of their modr/m byte hardcoded
size = 3;
}
else
@@ -2469,7 +2473,8 @@ void emitter::emitIns(instruction ins)
ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
#ifndef LEGACY_BACKEND
- || ins == INS_vzeroupper
+ // These instructions take zero operands
+ || ins == INS_vzeroupper || ins == INS_sfence
#endif
);
@@ -3971,6 +3976,25 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
}
#ifndef LEGACY_BACKEND
+void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
+{
+ assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
+
+ instrDesc* id = emitNewInstrAmd(attr, offs);
+
+ id->idIns(ins);
+
+ id->idInsFmt(IF_ARD);
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt)
{
ssize_t offs = indir->Offset();
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 8542767438..395934ed5f 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -382,6 +382,8 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival);
#ifndef LEGACY_BACKEND
+void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs);
+
void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt);
void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival);
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 1aea1f1605..0f9ece7254 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -438,7 +438,6 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
@@ -464,6 +463,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
genHWIntrinsic_R_R_RM(node, ins);
break;
}
+
case NI_SSE_CompareEqualOrderedScalar:
case NI_SSE_CompareEqualUnorderedScalar:
{
@@ -471,6 +471,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
op2Reg = op2->gtRegNum;
regNumber tmpReg = node->GetSingleTempReg();
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
@@ -486,6 +487,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
@@ -498,6 +500,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
@@ -510,6 +513,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
@@ -522,6 +526,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
@@ -536,6 +541,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
regNumber tmpReg = node->GetSingleTempReg();
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
@@ -551,6 +557,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(op2 == nullptr);
if (op1Reg != targetReg)
{
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
}
break;
@@ -561,10 +568,42 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
assert(baseType == TYP_FLOAT);
assert(op2 == nullptr);
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
break;
}
+ case NI_SSE_Prefetch0:
+ case NI_SSE_Prefetch1:
+ case NI_SSE_Prefetch2:
+ case NI_SSE_PrefetchNonTemporal:
+ {
+ assert(baseType == TYP_UBYTE);
+ assert(op2 == nullptr);
+
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
+ emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
+ break;
+ }
+
+ case NI_SSE_ReciprocalScalar:
+ case NI_SSE_ReciprocalSqrtScalar:
+ case NI_SSE_SqrtScalar:
+ {
+ assert(baseType == TYP_FLOAT);
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
+
+ if (op2 == nullptr)
+ {
+ emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op1Reg);
+ }
+ else
+ {
+ genHWIntrinsic_R_R_RM(node, ins);
+ }
+ break;
+ }
+
case NI_SSE_SetScalarVector128:
{
assert(baseType == TYP_FLOAT);
@@ -680,6 +719,15 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE_StoreFence:
+ {
+ assert(baseType == TYP_VOID);
+ assert(op1 == nullptr);
+ assert(op2 == nullptr);
+ emit->emitIns(INS_sfence);
+ break;
+ }
+
default:
unreached();
break;
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 62388380f0..09c3b32fcf 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -23,98 +23,103 @@
9) Each intrinsic has one or more flags with type of `enum HWIntrinsicFlag`
*/
// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
-// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// SSE Intrinsics
-HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Add, "Add", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqualScalar, "CompareNotGreaterThanOrEqualScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThan, "CompareNotLessThan", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanScalar, "CompareNotLessThanScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqual, "CompareNotLessThanOrEqual", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqualScalar, "CompareNotLessThanOrEqualScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareOrdered, "CompareOrdered", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareOrderedScalar, "CompareOrderedScalar", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareUnordered, "CompareUnordered", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Divide, "Divide", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_DivideScalar, "DivideScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_LoadAlignedVector128, "LoadAlignedVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadHigh, "LoadHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadLow, "LoadLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadScalarVector128, "LoadScalarVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadVector128, "LoadVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Max, "Max", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MaxScalar, "MaxScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Min, "Min", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MinScalar, "MinScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_MoveHighToLow, "MoveHighToLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveLowToHigh, "MoveLowToHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveScalar, "MoveScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_Multiply, "Multiply", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Or, "Or", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_Reciprocal, "Reciprocal", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ReciprocalScalar, "ReciprocalScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrt, "ReciprocalSqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar, "ReciprocalSqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_SetAllVector128, "SetAllVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(SSE_SetScalarVector128, "SetScalarVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_MultiIns)
-HARDWARE_INTRINSIC(SSE_SetVector128, "SetVector128", SSE, -1, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(SSE_SetZeroVector128, "SetZeroVector128", SSE, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Shuffle, "Shuffle", SSE, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE_Sqrt, "Sqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SqrtScalar, "SqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps}, HW_Category_Helper, HW_Flag_TwoTypeGeneric)
-HARDWARE_INTRINSIC(SSE_Store, "Store", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreAligned, "StoreAligned", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreHigh, "StoreHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreLow, "StoreLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreScalar, "StoreScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Subtract, "Subtract", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SubtractScalar, "SubtractScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_UnpackHigh, "UnpackHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_UnpackLow, "UnpackLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Xor, "Xor", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Add, "Add", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqualScalar, "CompareNotGreaterThanOrEqualScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThan, "CompareNotLessThan", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThanScalar, "CompareNotLessThanScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqual, "CompareNotLessThanOrEqual", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqualScalar, "CompareNotLessThanOrEqualScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareOrdered, "CompareOrdered", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareOrderedScalar, "CompareOrderedScalar", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareUnordered, "CompareUnordered", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Divide, "Divide", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_DivideScalar, "DivideScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_LoadAlignedVector128, "LoadAlignedVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadHigh, "LoadHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadLow, "LoadLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadScalarVector128, "LoadScalarVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadVector128, "LoadVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Max, "Max", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MaxScalar, "MaxScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_Min, "Min", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MinScalar, "MinScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_MoveHighToLow, "MoveHighToLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveLowToHigh, "MoveLowToHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveScalar, "MoveScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_Multiply, "Multiply", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_Or, "Or", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_Prefetch0, "Prefetch0", SSE, -1, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Prefetch1, "Prefetch1", SSE, -1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Prefetch2, "Prefetch2", SSE, -1, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_PrefetchNonTemporal, "PrefetchNonTemporal", SSE, -1, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Reciprocal, "Reciprocal", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ReciprocalScalar, "ReciprocalScalar", SSE, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_Special, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_ReciprocalSqrt, "ReciprocalSqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar, "ReciprocalSqrtScalar", SSE, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_Special, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_SetAllVector128, "SetAllVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(SSE_SetScalarVector128, "SetScalarVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(SSE_SetVector128, "SetVector128", SSE, -1, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(SSE_SetZeroVector128, "SetZeroVector128", SSE, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Shuffle, "Shuffle", SSE, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE_Sqrt, "Sqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_SqrtScalar, "SqrtScalar", SSE, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_Special, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps}, HW_Category_Helper, HW_Flag_TwoTypeGeneric)
+HARDWARE_INTRINSIC(SSE_Store, "Store", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreAligned, "StoreAligned", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreFence, "StoreFence", SSE, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreHigh, "StoreHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreLow, "StoreLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreScalar, "StoreScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Subtract, "Subtract", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_SubtractScalar, "SubtractScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_UnpackHigh, "UnpackHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_UnpackLow, "UnpackLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Xor, "Xor", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 5f16dd00be..64101e90a6 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -616,7 +616,10 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
GenTree* op3 = nullptr;
GenTree* op4 = nullptr;
int simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
- assert(simdSize == 16);
+
+ // The Prefetch and StoreFence intrinsics don't take any SIMD operands
+ // and have a simdSize of 0
+ assert((simdSize == 16) || (simdSize == 0));
switch (intrinsic)
{
@@ -662,6 +665,23 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
break;
}
+ case NI_SSE_ReciprocalScalar:
+ case NI_SSE_ReciprocalSqrtScalar:
+ case NI_SSE_SqrtScalar:
+ {
+ assert((sig->numArgs == 1) || (sig->numArgs == 2));
+ assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
+
+ if (sig->numArgs == 2)
+ {
+ op2 = impSIMDPopStack(TYP_SIMD16);
+ }
+
+ op1 = impSIMDPopStack(TYP_SIMD16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, simdSize);
+ break;
+ }
+
case NI_SSE_MoveMask:
assert(sig->numArgs == 1);
assert(JITtype2varType(sig->retType) == TYP_INT);
@@ -670,6 +690,18 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
break;
+ case NI_SSE_Prefetch0:
+ case NI_SSE_Prefetch1:
+ case NI_SSE_Prefetch2:
+ case NI_SSE_PrefetchNonTemporal:
+ {
+ assert(sig->numArgs == 1);
+ assert(JITtype2varType(sig->retType) == TYP_VOID);
+ op1 = impPopStack().val;
+ retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, intrinsic, TYP_UBYTE, 0);
+ break;
+ }
+
case NI_SSE_SetAllVector128:
assert(sig->numArgs == 1);
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
@@ -678,6 +710,12 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
TYP_FLOAT, simdSize);
break;
+ case NI_SSE_StoreFence:
+ assert(sig->numArgs == 0);
+ assert(JITtype2varType(sig->retType) == TYP_VOID);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, 0);
+ break;
+
default:
JITDUMP("Not implemented hardware intrinsic");
break;
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 8a016d3e9d..690cfe2d7d 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -218,6 +218,12 @@ INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCK
INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A))
+INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE)
+INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE)
+INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE)
+INST3( prefetcht2, "prefetcht2" , 0, IUM_RD, 0, 0, 0x000F1818, BAD_CODE, BAD_CODE)
+INST3( sfence, "sfence" , 0, IUM_RD, 0, 0, 0x000FF8AE, BAD_CODE, BAD_CODE)
+
// SSE 2 arith
INST3( addps, "addps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x58)) // Add packed singles
INST3( addss, "addss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x58)) // Add scalar singles