summaryrefslogtreecommitdiff
path: root/src/jit/hwintrinsiclistxarch.h
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2018-03-02 15:41:05 +0100
committerGitHub <noreply@github.com>2018-03-02 15:41:05 +0100
commit1116aa7dab525983af74134fc13c4899e0bf5648 (patch)
treeb8c20bc42d28d1a47868c7b1264afd6fb49ba57f /src/jit/hwintrinsiclistxarch.h
parent369ef400aad1a4a2adbe815d450608a02b6b9acb (diff)
parentc767b4d9b330c9e8a9c51e92aa01f2fceedec181 (diff)
downloadcoreclr-1116aa7dab525983af74134fc13c4899e0bf5648.tar.gz
coreclr-1116aa7dab525983af74134fc13c4899e0bf5648.tar.bz2
coreclr-1116aa7dab525983af74134fc13c4899e0bf5648.zip
Merge pull request #16646 from fiigii/insertextract
Implement SSE4.1 Insert/Extract and simplify SSE/SSE2 intrinsic
Diffstat (limited to 'src/jit/hwintrinsiclistxarch.h')
-rw-r--r--src/jit/hwintrinsiclistxarch.h130
1 files changed, 67 insertions, 63 deletions
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 69366edec3..5365c6e3d0 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -33,29 +33,29 @@ HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar"
HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
@@ -68,12 +68,12 @@ HARDWARE_INTRINSIC(SSE_CompareOrdered, "CompareOrd
HARDWARE_INTRINSIC(SSE_CompareOrderedScalar, "CompareOrderedScalar", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE_CompareUnordered, "CompareUnordered", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SecondArgMaybe64Bit|HW_Flag_BaseTypeFromFirstArg|HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_Divide, "Divide", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_DivideScalar, "DivideScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE_LoadAlignedVector128, "LoadAlignedVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
@@ -134,29 +134,29 @@ HARDWARE_INTRINSIC(SSE2_And, "And",
HARDWARE_INTRINSIC(SSE2_AndNot, "AndNot", SSE2, -1, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Average, "Average", SSE2, -1, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_CompareEqual, "CompareEqual", SSE2, 0, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareEqualScalar, "CompareEqualScalar", SSE2, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThan, "CompareGreaterThan", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThan, "CompareLessThan", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar, "CompareLessThanScalar", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar, "CompareLessThanScalar", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotEqual, "CompareNotEqual", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotEqualScalar, "CompareNotEqualScalar", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan, "CompareNotGreaterThan", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE2, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
@@ -169,25 +169,27 @@ HARDWARE_INTRINSIC(SSE2_CompareOrdered, "CompareOrd
HARDWARE_INTRINSIC(SSE2_CompareOrderedScalar, "CompareOrderedScalar", SSE2, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2_CompareUnordered, "CompareUnordered", SSE2, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_CompareUnorderedScalar, "CompareUnorderedScalar", SSE2, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_ConvertToDouble, "ConvertToDouble", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd}, HW_Category_Helper, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32, "ConvertToInt32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64, "ConvertToInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt32, "ConvertToUInt32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt64, "ConvertToUInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double, "ConvertToVector128Double", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double, "ConvertScalarToVector128Double", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32, "ConvertToVector128Int32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToDouble, "ConvertToDouble", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32, "ConvertToInt32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64, "ConvertToInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt32, "ConvertToUInt32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt64, "ConvertToUInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double, "ConvertToVector128Double", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double, "ConvertScalarToVector128Double", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SecondArgMaybe64Bit|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32, "ConvertToVector128Int32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32, "ConvertScalarToVector128Int32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation, "ConvertToVector128Int32WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64, "ConvertScalarToVector128Int64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single, "ConvertToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_Special, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation, "ConvertToVector128Int32WithTruncation", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64, "ConvertScalarToVector128Int64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_64BitOnly|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single, "ConvertToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single, "ConvertScalarToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32, "ConvertScalarToVector128UInt32", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64, "ConvertScalarToVector128UInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64, "ConvertScalarToVector128UInt64", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Divide, "Divide", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_DivideScalar, "DivideScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2_Extract, "Extract", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_Insert, "Insert", SSE2, -1, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAlignedVector128", SSE2, -1, 16, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
@@ -200,16 +202,16 @@ HARDWARE_INTRINSIC(SSE2_Min, "Min",
HARDWARE_INTRINSIC(SSE2_MinScalar, "MinScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_Special, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_MoveScalar, "MoveScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2_MultiplyHigh, "MultiplyHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyHorizontalAdd, "MultiplyHorizontalAdd", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_MultiplyHorizontalAdd, "MultiplyHorizontalAdd", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2_MultiplyLow, "MultiplyLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_MultiplyScalar, "MultiplyScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2_Or, "Or", SSE2, -1, 16, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_PackSignedSaturate, "PackSignedSaturate", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate, "PackUnsignedSaturate", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_PackSignedSaturate, "PackSignedSaturate", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate, "PackUnsignedSaturate", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2_SetZeroVector128, "SetZeroVector128", SSE2, -1, 16, 0, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_Helper, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences, "SumAbsoluteDifferences", SSE2, -1, 16, 2, {INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences, "SumAbsoluteDifferences", SSE2, -1, 16, 2, {INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical, "ShiftLeftLogical", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane, "ShiftLeftLogical128BitLane", SSE2, -1, 16, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic, "ShiftRightArithmetic", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
@@ -273,12 +275,14 @@ HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVaria
HARDWARE_INTRINSIC(SSE41_Ceiling, "Ceiling", SSE41, 10, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_CeilingScalar, "CeilingScalar", SSE41, 10, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_Special, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41_CompareEqual, "CompareEqual", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_DotProduct, "DotProduct", SSE41, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41_Extract, "Extract", SSE41, -1, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_pextrq, INS_pextrq, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_Floor, "Floor", SSE41, 9, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_FloorScalar, "FloorScalar", SSE41, 9, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_Special, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41_Insert, "Insert", SSE41, -1, 16, 3, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_pinsrq, INS_pinsrq, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SecondArgMaybe64Bit)
HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal, "LoadAlignedVector128NonTemporal", SSE41, -1, 16, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_Max, "Max", SSE41, -1, 16, 2, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41_Min, "Min", SSE41, -1, 16, 2, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
@@ -297,12 +301,12 @@ HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinity, "RoundToPos
HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinityScalar, "RoundToPositiveInfinityScalar", SSE41, 10, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_Special, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41_RoundToZero, "RoundToZero", SSE41, 11, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar, "RoundToZeroScalar", SSE41, 11, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_Special, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_TestAllOnes, "TestAllOnes", SSE41, -1, 16, 1, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE41_TestAllOnes, "TestAllOnes", SSE41, -1, 16, 1, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
// SSE42 Intrinsics
HARDWARE_INTRINSIC(SSE42_IsSupported, "get_IsSupported", SSE42, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
@@ -352,9 +356,9 @@ HARDWARE_INTRINSIC(AVX_Store, "Store",
HARDWARE_INTRINSIC(AVX_StoreAligned, "StoreAligned", AVX, -1, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", AVX, -1, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_Subtract, "Subtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX_UnpackHigh, "UnpackHigh", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_UnpackLow, "UnpackLow", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_Xor, "Xor", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)