summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-08-27 20:32:09 -0700
committerTanner Gooding <tagoo@outlook.com>2018-08-28 15:12:43 -0700
commit1cdced39f7eecf1f758837f7a2d9005770d45ae3 (patch)
tree3f9e9951856a61cc9157009aca5d20e095df69e3 /src
parent5e83757c500f9c26cac8da254e0fe9e3a7580390 (diff)
downloadcoreclr-1cdced39f7eecf1f758837f7a2d9005770d45ae3.tar.gz
coreclr-1cdced39f7eecf1f758837f7a2d9005770d45ae3.tar.bz2
coreclr-1cdced39f7eecf1f758837f7a2d9005770d45ae3.zip
Replacing the IsDstDstSrcAVXInstruction jump table with a flag.
Diffstat (limited to 'src')
-rw-r--r--src/jit/emitxarch.cpp246
-rw-r--r--src/jit/instr.h1
-rw-r--r--src/jit/instrsxarch.h484
3 files changed, 244 insertions, 487 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 1b1af430c6..8a8115c4f1 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -137,251 +137,7 @@ bool emitter::IsAVXInstruction(instruction ins)
// to indicate whether a 3-operand instruction.
bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
{
- switch (ins)
- {
- case INS_addpd:
- case INS_addps:
- case INS_addsd:
- case INS_addss:
- case INS_addsubpd:
- case INS_addsubps:
- case INS_aesdec:
- case INS_aesdeclast:
- case INS_aesenc:
- case INS_aesenclast:
- case INS_andn:
- case INS_andnpd:
- case INS_andnps:
- case INS_andpd:
- case INS_andps:
- case INS_blendpd:
- case INS_blendps:
- case INS_blsi:
- case INS_blsmsk:
- case INS_blsr:
- case INS_cmppd:
- case INS_cmpps:
- case INS_cmpsd:
- case INS_cmpss:
- case INS_cvtsi2sd:
- case INS_cvtsi2ss:
- case INS_cvtsd2ss:
- case INS_cvtss2sd:
- case INS_divpd:
- case INS_divps:
- case INS_divsd:
- case INS_divss:
- case INS_dppd:
- case INS_dpps:
- case INS_haddpd:
- case INS_haddps:
- case INS_hsubpd:
- case INS_hsubps:
- case INS_insertps:
- case INS_maxpd:
- case INS_maxps:
- case INS_maxsd:
- case INS_maxss:
- case INS_minpd:
- case INS_minps:
- case INS_minsd:
- case INS_minss:
- case INS_movhlps:
- case INS_movlhps:
- case INS_mpsadbw:
- case INS_mulpd:
- case INS_mulps:
- case INS_mulsd:
- case INS_mulss:
- case INS_orpd:
- case INS_orps:
- case INS_packssdw:
- case INS_packsswb:
- case INS_packusdw:
- case INS_packuswb:
- case INS_paddb:
- case INS_paddd:
- case INS_paddq:
- case INS_paddsb:
- case INS_paddsw:
- case INS_paddusb:
- case INS_paddusw:
- case INS_paddw:
- case INS_palignr:
- case INS_pand:
- case INS_pandn:
- case INS_pavgb:
- case INS_pavgw:
- case INS_pblendw:
- case INS_pcmpeqb:
- case INS_pcmpeqd:
- case INS_pcmpeqq:
- case INS_pcmpeqw:
- case INS_pcmpgtb:
- case INS_pcmpgtd:
- case INS_pcmpgtq:
- case INS_pcmpgtw:
- case INS_pdep:
- case INS_pext:
- case INS_phaddd:
- case INS_phaddsw:
- case INS_phaddw:
- case INS_phsubd:
- case INS_phsubsw:
- case INS_phsubw:
- case INS_pinsrb:
- case INS_pinsrw:
- case INS_pinsrd:
- case INS_pinsrq:
- case INS_pmaddubsw:
- case INS_pmaddwd:
- case INS_pmaxsb:
- case INS_pmaxsd:
- case INS_pmaxsw:
- case INS_pmaxub:
- case INS_pmaxud:
- case INS_pmaxuw:
- case INS_pminsb:
- case INS_pminsd:
- case INS_pminsw:
- case INS_pminub:
- case INS_pminud:
- case INS_pminuw:
- case INS_pmuldq:
- case INS_pmulhrsw:
- case INS_pmulhuw:
- case INS_pmulhw:
- case INS_pmulld:
- case INS_pmullw:
- case INS_pmuludq:
- case INS_por:
- case INS_psadbw:
- case INS_pshufb:
- case INS_psignb:
- case INS_psignd:
- case INS_psignw:
- case INS_psubb:
- case INS_psubd:
- case INS_psubq:
- case INS_psubsb:
- case INS_psubsw:
- case INS_psubusb:
- case INS_psubusw:
- case INS_psubw:
- case INS_pslld:
- case INS_pslldq:
- case INS_psllq:
- case INS_psllw:
- case INS_psrld:
- case INS_psrldq:
- case INS_psrlq:
- case INS_psrlw:
- case INS_psrad:
- case INS_psraw:
- case INS_punpckhbw:
- case INS_punpckhdq:
- case INS_punpckhqdq:
- case INS_punpckhwd:
- case INS_punpcklbw:
- case INS_punpckldq:
- case INS_punpcklqdq:
- case INS_punpcklwd:
- case INS_pxor:
- case INS_shufpd:
- case INS_shufps:
- case INS_subpd:
- case INS_subps:
- case INS_subsd:
- case INS_subss:
- case INS_unpckhps:
- case INS_unpcklps:
- case INS_unpckhpd:
- case INS_unpcklpd:
- case INS_vblendvps:
- case INS_vblendvpd:
- case INS_vpblendvb:
- case INS_vfmadd132pd:
- case INS_vfmadd213pd:
- case INS_vfmadd231pd:
- case INS_vfmadd132ps:
- case INS_vfmadd213ps:
- case INS_vfmadd231ps:
- case INS_vfmadd132sd:
- case INS_vfmadd213sd:
- case INS_vfmadd231sd:
- case INS_vfmadd132ss:
- case INS_vfmadd213ss:
- case INS_vfmadd231ss:
- case INS_vfmaddsub132pd:
- case INS_vfmaddsub213pd:
- case INS_vfmaddsub231pd:
- case INS_vfmaddsub132ps:
- case INS_vfmaddsub213ps:
- case INS_vfmaddsub231ps:
- case INS_vfmsubadd132pd:
- case INS_vfmsubadd213pd:
- case INS_vfmsubadd231pd:
- case INS_vfmsubadd132ps:
- case INS_vfmsubadd213ps:
- case INS_vfmsubadd231ps:
- case INS_vfmsub132pd:
- case INS_vfmsub213pd:
- case INS_vfmsub231pd:
- case INS_vfmsub132ps:
- case INS_vfmsub213ps:
- case INS_vfmsub231ps:
- case INS_vfmsub132sd:
- case INS_vfmsub213sd:
- case INS_vfmsub231sd:
- case INS_vfmsub132ss:
- case INS_vfmsub213ss:
- case INS_vfmsub231ss:
- case INS_vfnmadd132pd:
- case INS_vfnmadd213pd:
- case INS_vfnmadd231pd:
- case INS_vfnmadd132ps:
- case INS_vfnmadd213ps:
- case INS_vfnmadd231ps:
- case INS_vfnmadd132sd:
- case INS_vfnmadd213sd:
- case INS_vfnmadd231sd:
- case INS_vfnmadd132ss:
- case INS_vfnmadd213ss:
- case INS_vfnmadd231ss:
- case INS_vfnmsub132pd:
- case INS_vfnmsub213pd:
- case INS_vfnmsub231pd:
- case INS_vfnmsub132ps:
- case INS_vfnmsub213ps:
- case INS_vfnmsub231ps:
- case INS_vfnmsub132sd:
- case INS_vfnmsub213sd:
- case INS_vfnmsub231sd:
- case INS_vfnmsub132ss:
- case INS_vfnmsub213ss:
- case INS_vfnmsub231ss:
- case INS_vinsertf128:
- case INS_vinserti128:
- case INS_vmaskmovps:
- case INS_vmaskmovpd:
- case INS_vpmaskmovd:
- case INS_vpmaskmovq:
- case INS_vpblendd:
- case INS_vperm2i128:
- case INS_vperm2f128:
- case INS_vpermilpsvar:
- case INS_vpermilpdvar:
- case INS_vpsrlvd:
- case INS_vpsrlvq:
- case INS_vpsravd:
- case INS_vpsllvd:
- case INS_vpsllvq:
- case INS_xorpd:
- case INS_xorps:
- return IsAVXInstruction(ins);
- default:
- return false;
- }
+ return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
}
// Returns true if the AVX instruction requires 3 operands that duplicate the source
diff --git a/src/jit/instr.h b/src/jit/instr.h
index c00a61ceb4..ca09b51625 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -93,6 +93,7 @@ enum insFlags: uint8_t
INS_FLAGS_ReadsFlags = 0x01,
INS_FLAGS_WritesFlags = 0x02,
INS_FLAGS_x87Instr = 0x04,
+ INS_Flags_IsDstDstSrcAVXInstruction = 0x08,
// TODO-Cleanup: Remove this flag and its usage from _TARGET_XARCH_
INS_FLAGS_DONT_CARE = 0x00,
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index d4278f7957..232cf27508 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -183,9 +183,9 @@ INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE,
INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None)
INST3(movsdsse2, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_FLAGS_None)
-INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_FLAGS_None)
+INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction)
-INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_FLAGS_None) // XOR packed singles
+INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles
INST3(cvttsd2si, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_FLAGS_None) // cvt with trunc scalar double to signed DWORDs
@@ -204,17 +204,17 @@ INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE,
INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_FLAGS_None)
INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_FLAGS_None)
INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_FLAGS_None)
-INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_FLAGS_None)
-INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_FLAGS_None)
+INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstDstSrcAVXInstruction)
+INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstDstSrcAVXInstruction)
INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_FLAGS_None)
-INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_FLAGS_None)
-INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_FLAGS_None)
+INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_Flags_IsDstDstSrcAVXInstruction)
+INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_Flags_IsDstDstSrcAVXInstruction)
INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_FLAGS_None)
-INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_FLAGS_None)
-INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_FLAGS_None)
+INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_Flags_IsDstDstSrcAVXInstruction)
+INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_Flags_IsDstDstSrcAVXInstruction)
-INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_FLAGS_None)
+INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_Flags_IsDstDstSrcAVXInstruction)
INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
@@ -225,47 +225,47 @@ INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE,
INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// SSE 2 arith
-INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_FLAGS_None) // Add packed singles
-INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_FLAGS_None) // Add scalar singles
-INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_FLAGS_None) // Add packed doubles
-INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_FLAGS_None) // Add scalar doubles
-INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_FLAGS_None) // Multiply packed singles
-INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_FLAGS_None) // Multiply scalar single
-INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_FLAGS_None) // Multiply packed doubles
-INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_FLAGS_None) // Multiply scalar doubles
-INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_FLAGS_None) // Subtract packed singles
-INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_FLAGS_None) // Subtract scalar singles
-INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_FLAGS_None) // Subtract packed doubles
-INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_FLAGS_None) // Subtract scalar doubles
-INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_FLAGS_None) // Return Minimum packed singles
-INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_FLAGS_None) // Return Minimum scalar single
-INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_FLAGS_None) // Return Minimum packed doubles
-INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_FLAGS_None) // Return Minimum scalar double
-INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_FLAGS_None) // Divide packed singles
-INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_FLAGS_None) // Divide scalar singles
-INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_FLAGS_None) // Divide packed doubles
-INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_FLAGS_None) // Divide scalar doubles
-INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_FLAGS_None) // Return Maximum packed singles
-INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_FLAGS_None) // Return Maximum scalar single
-INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_FLAGS_None) // Return Maximum packed doubles
-INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_FLAGS_None) // Return Maximum scalar double
-INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_FLAGS_None) // XOR packed doubles
-INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_FLAGS_None) // AND packed singles
-INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_FLAGS_None) // AND packed doubles
+INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles
+INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles
+INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles
+INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles
+INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles
+INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single
+INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles
+INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles
+INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles
+INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles
+INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles
+INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles
+INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles
+INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single
+INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles
+INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double
+INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles
+INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles
+INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles
+INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles
+INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles
+INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single
+INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles
+INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double
+INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles
+INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles
+INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles
INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_FLAGS_None) // Sqrt of packed singles
INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_FLAGS_None) // Sqrt of scalar single
INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_FLAGS_None) // Sqrt of packed doubles
INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_FLAGS_None) // Sqrt of scalar double
-INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_FLAGS_None) // And-Not packed singles
-INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_FLAGS_None) // And-Not packed doubles
-INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_FLAGS_None) // Or packed singles
-INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_FLAGS_None) // Or packed doubles
-INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_FLAGS_None) // Horizontal add packed doubles
-INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_FLAGS_None) // Horizontal add packed floats
-INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_FLAGS_None) // Horizontal subtract packed doubles
-INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_FLAGS_None) // Horizontal subtract packed floats
-INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_FLAGS_None) // Add/Subtract packed singles
-INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_FLAGS_None) // Add/Subtract packed doubles
+INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles
+INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles
+INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles
+INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles
+INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles
+INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats
+INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles
+INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats
+INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles
+INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles
// SSE 2 approx arith
INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_FLAGS_None) // Reciprocal of packed singles
@@ -275,9 +275,9 @@ INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE,
// SSE2 conversions
INST3(cvtpi2ps, "cvtpi2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2A), INS_FLAGS_None) // cvt packed DWORDs to singles
-INST3(cvtsi2ss, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_FLAGS_None) // cvt DWORD to scalar single
+INST3(cvtsi2ss, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single
INST3(cvtpi2pd, "cvtpi2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2A), INS_FLAGS_None) // cvt packed DWORDs to doubles
-INST3(cvtsi2sd, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_FLAGS_None) // cvt DWORD to scalar double
+INST3(cvtsi2sd, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double
INST3(cvttps2pi, "cvttps2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2C), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
INST3(cvttss2si, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_FLAGS_None) // cvt with trunc scalar single to DWORD
INST3(cvttpd2pi, "cvttpd2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2C), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs
@@ -287,8 +287,8 @@ INST3(cvtpd2pi, "cvtpd2pi", IUM_WR, BAD_CODE, BAD_CODE,
INST3(cvtsd2si, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_FLAGS_None) // cvt scalar double to DWORD
INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_FLAGS_None) // cvt packed singles to doubles
INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_FLAGS_None) // cvt packed doubles to singles
-INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_FLAGS_None) // cvt scalar single to scalar doubles
-INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_FLAGS_None) // cvt scalar double to scalar singles
+INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles
+INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles
INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_FLAGS_None) // cvt packed DWORDs to singles
INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_FLAGS_None) // cvt packed singles to DWORDs
INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
@@ -304,113 +304,113 @@ INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE,
// SSE2 packed single/double comparison operations.
// Note that these instructions not only compare but also overwrite the first source.
-INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_FLAGS_None) // compare packed singles
-INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_FLAGS_None) // compare packed doubles
-INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_FLAGS_None) // compare scalar singles
-INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_FLAGS_None) // compare scalar doubles
+INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles
+INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles
+INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles
+INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles
//SSE2 packed integer operations
-INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_FLAGS_None) // Add packed byte integers
-INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_FLAGS_None) // Add packed word (16-bit) integers
-INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_FLAGS_None) // Add packed double-word (32-bit) integers
-INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_FLAGS_None) // Add packed quad-word (64-bit) integers
-INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_FLAGS_None) // Add packed signed byte integers and saturate the results
-INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_FLAGS_None) // Add packed signed word integers and saturate the results
-INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_FLAGS_None) // Add packed unsigned byte integers and saturate the results
-INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_FLAGS_None) // Add packed unsigned word integers and saturate the results
-INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_FLAGS_None) // Average of packed byte integers
-INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_FLAGS_None) // Average of packed word integers
-INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_FLAGS_None) // Subtract packed word (16-bit) integers
-INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_FLAGS_None) // Subtract packed word (16-bit) integers
-INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_FLAGS_None) // Subtract packed double-word (32-bit) integers
-INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_FLAGS_None) // subtract packed quad-word (64-bit) integers
-INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_FLAGS_None) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
-INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_FLAGS_None) // Multiply high the packed 16-bit signed integers
-INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_FLAGS_None) // Multiply high the packed 16-bit unsigned integers
-INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_FLAGS_None) // packed multiply 32-bit unsigned integers and store 64-bit result
-INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_FLAGS_None) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
-INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_FLAGS_None) // Packed bit-wise AND of two xmm regs
-INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_FLAGS_None) // Packed bit-wise AND NOT of two xmm regs
-INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_FLAGS_None) // Packed bit-wise OR of two xmm regs
-INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_FLAGS_None) // Packed bit-wise XOR of two xmm regs
-INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_FLAGS_None) // Compute the sum of absolute differences of packed unsigned 8-bit integers
-INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_FLAGS_None) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
-INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_FLAGS_None) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
-INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_FLAGS_None) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
-INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_FLAGS_None) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
+INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers
+INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers
+INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers
+INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers
+INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results
+INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results
+INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results
+INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results
+INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers
+INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers
+INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers
+INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers
+INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers
+INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers
+INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
+INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers
+INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers
+INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result
+INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
+INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs
+INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs
+INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs
+INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs
+INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers
+INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
+INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
+INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
+INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
// Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode,
// which is handled in emitxarch.cpp.
-INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_FLAGS_None) // Shift right logical of xmm reg by given number of bytes
-INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_FLAGS_None) // Shift left logical of xmm reg by given number of bytes
-INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_FLAGS_None) // Packed shift left logical of 16-bit integers
-INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_FLAGS_None) // Packed shift left logical of 32-bit integers
-INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_FLAGS_None) // Packed shift left logical of 64-bit integers
-INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_FLAGS_None) // Packed shift right logical of 16-bit integers
-INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_FLAGS_None) // Packed shift right logical of 32-bit integers
-INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_FLAGS_None) // Packed shift right logical of 64-bit integers
-INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_FLAGS_None) // Packed shift right arithmetic of 16-bit integers
-INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_FLAGS_None) // Packed shift right arithmetic of 32-bit integers
-
-INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_FLAGS_None) // packed maximum unsigned bytes
-INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_FLAGS_None) // packed minimum unsigned bytes
-INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_FLAGS_None) // packed maximum signed words
-INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_FLAGS_None) // packed minimum signed words
-INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_FLAGS_None) // Packed compare 32-bit integers for equality
-INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_FLAGS_None) // Packed compare 32-bit signed integers for greater than
-INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_FLAGS_None) // Packed compare 16-bit integers for equality
-INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_FLAGS_None) // Packed compare 16-bit signed integers for greater than
-INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_FLAGS_None) // Packed compare 8-bit integers for equality
-INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_FLAGS_None) // Packed compare 8-bit signed integers for greater than
+INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes
+INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes
+INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers
+INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers
+INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers
+INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers
+INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers
+INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers
+INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers
+INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers
+
+INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes
+INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes
+INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words
+INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words
+INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality
+INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than
+INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality
+INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than
+INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality
+INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than
INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_FLAGS_None) // Packed shuffle of 32-bit integers
INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_FLAGS_None) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_FLAGS_None) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_FLAGS_None) // Extract 16-bit value into a r32 with zero extended to 32-bits
-INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_FLAGS_None) // Insert word at index
-
-INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
-INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (lo)
-INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_FLAGS_None) // Packed logical (unsigned) widen uint to ulong (hi)
-INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_FLAGS_None) // Packed logical (unsigned) widen uint to ulong (lo)
-INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_FLAGS_None) // Packed logical (unsigned) widen ushort to uint (hi)
-INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_FLAGS_None) // Packed logical (unsigned) widen ushort to uint (lo)
-INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
-INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
-
-INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_FLAGS_None) // Pack (narrow) int to short with saturation
-INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_FLAGS_None) // Pack (narrow) short to byte with saturation
-INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_FLAGS_None) // Pack (narrow) short to unsigned byte with saturation
+INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index
+
+INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
+INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo)
+INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi)
+INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo)
+INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi)
+INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo)
+INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
+INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
+
+INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation
+INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation
+INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation
INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// id nm um mr mi rm flags
-INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_FLAGS_None) // Packed dot product of two float vector regs
-INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_FLAGS_None) // Packed dot product of two double vector regs
-INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_FLAGS_None) // Insert packed single precision float value
-INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_FLAGS_None) // Packed compare 64-bit integers for equality
-INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_FLAGS_None) // Packed compare 64-bit integers for equality
-INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_FLAGS_None) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
+INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs
+INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs
+INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value
+INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
+INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
+INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3(ptest, "ptest", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x17), INS_FLAGS_None) // Packed logical compare
-INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_FLAGS_None) // Packed horizontal add
+INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add
INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_FLAGS_None) // Packed absolute value of bytes
INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_FLAGS_None) // Packed absolute value of 16-bit integers
INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_FLAGS_None) // Packed absolute value of 32-bit integers
-INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_FLAGS_None) // Packed Align Right
-INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_FLAGS_None) // Multiply and Add Packed Signed and Unsigned Bytes
-INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_FLAGS_None) // Packed Multiply High with Round and Scale
-INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_FLAGS_None) // Packed Shuffle Bytes
-INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_FLAGS_None) // Packed SIGN
-INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_FLAGS_None) // Packed SIGN
-INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_FLAGS_None) // Packed SIGN
-INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_FLAGS_None) // packed minimum signed bytes
-INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_FLAGS_None) // packed minimum 32-bit signed integers
-INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_FLAGS_None) // packed minimum 16-bit unsigned integers
-INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_FLAGS_None) // packed minimum 32-bit unsigned integers
-INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_FLAGS_None) // packed maximum signed bytes
-INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_FLAGS_None) // packed maximum 32-bit signed integers
-INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_FLAGS_None) // packed maximum 16-bit unsigned integers
-INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_FLAGS_None) // packed maximum 32-bit unsigned integers
+INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right
+INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes
+INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale
+INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes
+INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
+INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
+INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
+INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes
+INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers
+INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers
+INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers
+INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes
+INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers
+INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers
+INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers
INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_FLAGS_None) // Packed sign extend byte to short
INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_FLAGS_None) // Packed sign extend byte to int
INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_FLAGS_None) // Packed sign extend byte to long
@@ -423,33 +423,33 @@ INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE,
INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_FLAGS_None) // Packed zero extend short to int
INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_FLAGS_None) // Packed zero extend short to long
INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_FLAGS_None) // Packed zero extend int to long
-INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_FLAGS_None) // Pack (narrow) int to unsigned short with saturation
+INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation
INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_FLAGS_None) // Round packed single precision floating-point values
INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_FLAGS_None) // Round scalar single precision floating-point values
INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_FLAGS_None) // Round packed double precision floating-point values
INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_FLAGS_None) // Round scalar double precision floating-point values
-INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_FLAGS_None) // packed multiply 32-bit signed integers and store 64-bit result
-INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_FLAGS_None) // Blend Packed Single Precision Floating-Point Values
+INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result
+INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values
INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_FLAGS_None) // Variable Blend Packed Singles
-INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_FLAGS_None) // Blend Packed Double Precision Floating-Point Values
+INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_FLAGS_None) // Variable Blend Packed Doubles
-INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_FLAGS_None) // Blend Packed Words
+INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words
INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_FLAGS_None) // Variable Blend Packed Bytes
-INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_FLAGS_None) // Packed horizontal add of 16-bit integers
-INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_FLAGS_None) // Packed horizontal subtract of 16-bit integers
-INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_FLAGS_None) // Packed horizontal subtract of 32-bit integers
-INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_FLAGS_None) // Packed horizontal add of 16-bit integers with saturation
-INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_FLAGS_None) // Packed horizontal subtract of 16-bit integers with saturation
+INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers
+INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers
+INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers
+INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation
+INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation
INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_FLAGS_None) // Load Unaligned integer
INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_FLAGS_None) // Load Double Quadword Non-Temporal Aligned Hint
INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_FLAGS_None) // Replicate Double FP Values
INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_FLAGS_None) // Replicate even-indexed Single FP Values
INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_FLAGS_None) // Replicate odd-indexed Single FP Values
INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_FLAGS_None) // Packed Horizontal Word Minimum
-INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_FLAGS_None) // Compute Multiple Packed Sums of Absolute Difference
-INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_FLAGS_None) // Insert Byte
-INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_FLAGS_None) // Insert Dword
-INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_FLAGS_None) // Insert Qword
+INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
+INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte
+INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword
+INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword
INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Byte
INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Dword
INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Qword
@@ -457,10 +457,10 @@ INST3(pextrw_sse41, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE,
INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Packed Floating-Point Values
//AES instructions
-INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_FLAGS_None) // Perform one round of an AES decryption flow
-INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_FLAGS_None) // Perform last round of an AES decryption flow
-INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_FLAGS_None) // Perform one round of an AES encryption flow
-INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_FLAGS_None) // Perform last round of an AES encryption flow
+INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow
+INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow
+INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow
+INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow
INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation
INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
@@ -475,108 +475,108 @@ INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE,
INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_FLAGS_None) // Broadcast int64 value from reg/memory to entire ymm register
INST3(vextractf128, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed floating point values
INST3(vextracti128, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed integer values
-INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_FLAGS_None) // Insert 128-bit packed floating point values
-INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_FLAGS_None) // Insert 128-bit packed integer values
+INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values
+INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values
INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_FLAGS_None) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
-INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_FLAGS_None) // Permute 128-bit halves of input register
+INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register
INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_FLAGS_None) // Permute 64-bit of input register
-INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_FLAGS_None) // Blend Packed DWORDs
-INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_FLAGS_None) // Variable Blend Packed Singles
-INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_FLAGS_None) // Variable Blend Packed Doubles
-INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_FLAGS_None) // Variable Blend Packed Bytes
+INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs
+INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles
+INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles
+INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes
INST3(vtestps, "testps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_FLAGS_None) // Packed Bit Test
INST3(vtestpd, "testpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_FLAGS_None) // Packed Bit Test
-INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_FLAGS_None) // Variable Bit Shift Right Logical
-INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_FLAGS_None) // Variable Bit Shift Right Logical
-INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_FLAGS_None) // Variable Bit Shift Right Arithmetic
-INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_FLAGS_None) // Variable Bit Shift Left Logical
-INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_FLAGS_None) // Variable Bit Shift Left Logical
+INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
+INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
+INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic
+INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
+INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
-INST3(vpermilpsvar, "permilpsvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
-INST3(vpermilpdvar, "permilpdvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
-INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_FLAGS_None) // Permute Floating-Point Values
+INST3(vpermilpsvar, "permilpsvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
+INST3(vpermilpdvar, "permilpdvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
+INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values
INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register
INST3(vbroadcasti128, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register
-INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_FLAGS_None) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
-INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_FLAGS_None) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
-INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_FLAGS_None) // Conditional SIMD Integer Packed Dword Loads and Stores
-INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_FLAGS_None) // Conditional SIMD Integer Packed Qword Loads and Stores
+INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
+INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
+INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores
+INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores
INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// id nm um mr mi rm flags
-INST3(vfmadd132pd, "fmadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_FLAGS_None) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
-INST3(vfmadd213pd, "fmadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_FLAGS_None) //
-INST3(vfmadd231pd, "fmadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_FLAGS_None) //
-INST3(vfmadd132ps, "fmadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_FLAGS_None) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
-INST3(vfmadd213ps, "fmadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_FLAGS_None) //
-INST3(vfmadd231ps, "fmadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_FLAGS_None) //
-INST3(vfmadd132sd, "fmadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_FLAGS_None) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
-INST3(vfmadd213sd, "fmadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_FLAGS_None) //
-INST3(vfmadd231sd, "fmadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_FLAGS_None) //
-INST3(vfmadd132ss, "fmadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_FLAGS_None) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
-INST3(vfmadd213ss, "fmadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_FLAGS_None) //
-INST3(vfmadd231ss, "fmadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_FLAGS_None) //
-INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_FLAGS_None) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_FLAGS_None) //
-INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_FLAGS_None) //
-INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_FLAGS_None) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_FLAGS_None) //
-INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_FLAGS_None) //
-INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_FLAGS_None) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
-INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_FLAGS_None) //
-INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_FLAGS_None) //
-INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_FLAGS_None) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
-INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_FLAGS_None) //
-INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_FLAGS_None) //
-INST3(vfmsub132pd, "fmsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_FLAGS_None) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfmsub213pd, "fmsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_FLAGS_None) //
-INST3(vfmsub231pd, "fmsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_FLAGS_None) //
-INST3(vfmsub132ps, "fmsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_FLAGS_None) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfmsub213ps, "fmsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_FLAGS_None) //
-INST3(vfmsub231ps, "fmsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_FLAGS_None) //
-INST3(vfmsub132sd, "fmsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_FLAGS_None) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
-INST3(vfmsub213sd, "fmsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_FLAGS_None) //
-INST3(vfmsub231sd, "fmsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_FLAGS_None) //
-INST3(vfmsub132ss, "fmsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_FLAGS_None) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
-INST3(vfmsub213ss, "fmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_FLAGS_None) //
-INST3(vfmsub231ss, "fmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_FLAGS_None) //
-INST3(vfnmadd132pd, "fmnadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_FLAGS_None) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
-INST3(vfnmadd213pd, "fmnadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_FLAGS_None) //
-INST3(vfnmadd231pd, "fmnadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_FLAGS_None) //
-INST3(vfnmadd132ps, "fmnadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_FLAGS_None) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
-INST3(vfnmadd213ps, "fmnadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_FLAGS_None) //
-INST3(vfnmadd231ps, "fmnadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_FLAGS_None) //
-INST3(vfnmadd132sd, "fmnadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_FLAGS_None) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
-INST3(vfnmadd213sd, "fmnadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_FLAGS_None) //
-INST3(vfnmadd231sd, "fmnadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_FLAGS_None) //
-INST3(vfnmadd132ss, "fmnadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_FLAGS_None) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
-INST3(vfnmadd213ss, "fmnadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_FLAGS_None) //
-INST3(vfnmadd231ss, "fmnadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_FLAGS_None) //
-INST3(vfnmsub132pd, "fmnsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfnmsub213pd, "fmnsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_FLAGS_None) //
-INST3(vfnmsub231pd, "fmnsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_FLAGS_None) //
-INST3(vfnmsub132ps, "fmnsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfnmsub213ps, "fmnsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_FLAGS_None) //
-INST3(vfnmsub231ps, "fmnsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_FLAGS_None) //
-INST3(vfnmsub132sd, "fmnsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
-INST3(vfnmsub213sd, "fmnsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_FLAGS_None) //
-INST3(vfnmsub231sd, "fmnsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_FLAGS_None) //
-INST3(vfnmsub132ss, "fmnsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
-INST3(vfnmsub213ss, "fmnsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_FLAGS_None) //
-INST3(vfnmsub231ss, "fmnsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_FLAGS_None) //
+INST3(vfmadd132pd, "fmadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
+INST3(vfmadd213pd, "fmadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd231pd, "fmadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd132ps, "fmadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
+INST3(vfmadd213ps, "fmadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd231ps, "fmadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd132sd, "fmadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
+INST3(vfmadd213sd, "fmadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd231sd, "fmadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd132ss, "fmadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
+INST3(vfmadd213ss, "fmadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmadd231ss, "fmadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
+INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
+INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub132pd, "fmsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfmsub213pd, "fmsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub231pd, "fmsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub132ps, "fmsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfmsub213ps, "fmsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub231ps, "fmsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub132sd, "fmsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
+INST3(vfmsub213sd, "fmsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub231sd, "fmsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub132ss, "fmsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
+INST3(vfmsub213ss, "fmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfmsub231ss, "fmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd132pd, "fmnadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
+INST3(vfnmadd213pd, "fmnadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd231pd, "fmnadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd132ps, "fmnadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
+INST3(vfnmadd213ps, "fmnadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd231ps, "fmnadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd132sd, "fmnadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
+INST3(vfnmadd213sd, "fmnadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd231sd, "fmnadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd132ss, "fmnadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
+INST3(vfnmadd213ss, "fmnadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmadd231ss, "fmnadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub132pd, "fmnsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfnmsub213pd, "fmnsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub231pd, "fmnsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub132ps, "fmnsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfnmsub213ps, "fmnsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub231ps, "fmnsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub132sd, "fmnsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
+INST3(vfnmsub213sd, "fmnsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub231sd, "fmnsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub132ss, "fmnsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
+INST3(vfnmsub213ss, "fmnsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) //
+INST3(vfnmsub231ss, "fmnsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
-INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_FLAGS_None) // Logical AND NOT
-INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Extract Lowest Set Isolated Bit
-INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Get Mask Up to Lowest Set Bit
-INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Reset Lowest Set Bit
+INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
+INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
+INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
+INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
// BMI2
-INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_None) // Parallel Bits Deposit
-INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_None) // Parallel Bits Extract
+INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
+INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)