diff options
author | Fei <fei.peng@intel.com> | 2018-03-20 02:07:59 -0800 |
---|---|---|
committer | Fei <fei.peng@intel.com> | 2018-03-20 02:07:59 -0800 |
commit | dd96fc5e1eec6b3d14fbf0537f4da36306a7c02a (patch) | |
tree | 9b3e3aabf55cdd1d108a6511cff5141bf02c3e2b | |
parent | e9e33773a86f2433008ccce2632d44f3d9bb7f00 (diff) | |
download | coreclr-dd96fc5e1eec6b3d14fbf0537f4da36306a7c02a.tar.gz coreclr-dd96fc5e1eec6b3d14fbf0537f4da36306a7c02a.tar.bz2 coreclr-dd96fc5e1eec6b3d14fbf0537f4da36306a7c02a.zip |
Implement more AVX/AVX2 intrinsics
-rw-r--r-- | src/jit/emitxarch.cpp | 5 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 10 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 16 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 12 |
4 files changed, 38 insertions, 5 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 4e1bec97fb..117bc21c2e 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -223,7 +223,12 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins) case INS_unpcklpd: case INS_vinsertf128: case INS_vinserti128: + case INS_vmaskmovps: + case INS_vmaskmovpd: case INS_vperm2i128: + case INS_vperm2f128: + case INS_vpermilpsvar: + case INS_vpermilpdvar: case INS_vpsrlvd: case INS_vpsrlvq: case INS_vpsravd: diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 1d2b51ec7e..1d6380ebe0 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -134,7 +134,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (category == HW_Category_MemoryLoad) { - emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); + if (intrinsicID == NI_AVX_MaskLoad) + { + emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op2Reg, op1Reg); + } + else + { + emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg); + } } else if (Compiler::isImmHWIntrinsic(intrinsicID, op2)) { @@ -1331,7 +1338,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) if (op1Reg != targetReg) { - instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg); } break; diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 7435c286fa..216284f935 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -333,6 +333,7 @@ HARDWARE_INTRINSIC(AVX_BlendVariable, "BlendVaria HARDWARE_INTRINSIC(AVX_Ceiling, "Ceiling", AVX, 10, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_BroadcastScalarToVector128, "BroadcastScalarToVector128", AVX, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_BroadcastScalarToVector256, "BroadcastScalarToVector256", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_BroadcastVector128ToVector256, "BroadcastVector128ToVector256", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid,INS_vbroadcastf128,INS_vbroadcastf128}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Compare, "Compare", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_CompareScalar, "CompareScalar", AVX, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX_ConvertToSingle, "ConvertToSingle", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -359,9 +360,13 @@ HARDWARE_INTRINSIC(AVX_LoadDquVector256, "LoadDquVec HARDWARE_INTRINSIC(AVX_LoadVector256, "LoadVector256", AVX, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_Max, "Max", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Min, "Min", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX_MaskLoad, "MaskLoad", AVX, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps,INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_UnfixedSIMDSize) +HARDWARE_INTRINSIC(AVX_MoveMask, "MoveMask", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX_Multiply, "Multiply", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Or, "Or", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Permute, "Permute", AVX, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_UnfixedSIMDSize) +HARDWARE_INTRINSIC(AVX_Permute2x128, "Permute2x128", AVX, -1, 32, 3, {INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128,INS_vperm2f128}, HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX_PermuteVar, "PermuteVar", AVX, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar,INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize) HARDWARE_INTRINSIC(AVX_Reciprocal, "Reciprocal", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_ReciprocalSqrt, "ReciprocalSqrt", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_RoundCurrentDirection, "RoundCurrentDirection", AVX, 4, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -395,17 +400,28 @@ HARDWARE_INTRINSIC(AVX2_Average, "Average", HARDWARE_INTRINSIC(AVX2_BlendVariable, "BlendVariable", AVX2, -1, 32, 3, {INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128, "BroadcastScalarToVector128", AVX2, -1, 16, 1, {INS_vpbroadcastb,INS_vpbroadcastb,INS_vpbroadcastw,INS_vpbroadcastw,INS_vpbroadcastd,INS_vpbroadcastd,INS_vpbroadcastq,INS_vpbroadcastq,INS_vbroadcastss,INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric) HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256, "BroadcastScalarToVector256", AVX2, -1, 32, 1, {INS_vpbroadcastb,INS_vpbroadcastb,INS_vpbroadcastw,INS_vpbroadcastw,INS_vpbroadcastd,INS_vpbroadcastd,INS_vpbroadcastq,INS_vpbroadcastq,INS_vbroadcastss,INS_vbroadcastsd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric) +HARDWARE_INTRINSIC(AVX2_BroadcastVector128ToVector256, "BroadcastVector128ToVector256", AVX2, -1, 32, 1, {INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_vbroadcasti128,INS_invalid,INS_invalid},HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_CompareEqual, "CompareEqual", AVX2, -1, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2_CompareGreaterThan, "CompareGreaterThan", AVX2, -1, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_ExtractVector128, "ExtractVector128", AVX2, -1, 32, -1, {INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_invalid, INS_invalid},HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt16, "ConvertToVector256UInt16", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_invalid, INS_pmovsxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt32, "ConvertToVector256UInt32", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbd, INS_invalid, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_invalid, INS_pmovsxwq, INS_invalid, INS_pmovsxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt64, "ConvertToVector256UInt64", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbq, INS_invalid, INS_pmovzxwq, INS_invalid, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate, "HorizontalSubtractSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_InsertVector128, "InsertVector128", AVX2, -1, 32, 3, {INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal, "LoadAlignedVector256NonTemporal", AVX2, -1, 32, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2_Max, "Max", AVX2, -1, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2_Min, "Min", AVX2, -1, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2_MoveMask, "MoveMask", AVX2, -1, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_Multiply, "Multiply", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2_Or, "Or", AVX2, -1, 32, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2_Permute2x128, "Permute2x128", AVX2, -1, 32, 3, {INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_vperm2i128,INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical, "ShiftLeftLogical", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical128BitLane, "ShiftLeftLogical128BitLane", AVX2, -1, 32, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable, "ShiftLeftLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize|HW_Flag_NoContainment) diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index c4caf9ae76..23af97a282 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -474,8 +474,8 @@ INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register -INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values -INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, BAD_CODE) // Extract 128-bit packed integer values +INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, SSE3A(0x19)) // Extract 128-bit packed floating point values +INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, SSE3A(0x39)) // Extract 128-bit packed integer values INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values INST3( vinserti128, "inserti128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x38)) // Insert 128-bit packed integer values INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) @@ -493,7 +493,13 @@ INST3( vpsllvd, "psllvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( vpsllvq, "psllvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical INST3( vpermilps, "permilps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x04)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values INST3( vpermilpd, "permilpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x05)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values - +INST3( vpermilpsvar, "permilpsvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0C)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0D)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values +INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register +INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register +INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Loads Float +INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Loads Double INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) // Scalar instructions in SSE4.2 |