summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-08-23 16:18:33 -0700
committerTanner Gooding <tagoo@outlook.com>2018-08-27 19:53:53 -0700
commit9e18844d8a257561189ffda7ebdf3a21ba061fcd (patch)
treeb3f74217989915bd6ce51d200e396e18ce489f6a /src
parent6975ff37ea866b4a9dc913f5d72d31b10fb40da9 (diff)
downloadcoreclr-9e18844d8a257561189ffda7ebdf3a21ba061fcd.tar.gz
coreclr-9e18844d8a257561189ffda7ebdf3a21ba061fcd.tar.bz2
coreclr-9e18844d8a257561189ffda7ebdf3a21ba061fcd.zip
Merging the instrsxarch fp, rf, and wf parameters into a single flags parameter
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegeninterface.h6
-rw-r--r--src/jit/emitxarch.cpp103
-rw-r--r--src/jit/instr.cpp16
-rw-r--r--src/jit/instr.h35
-rw-r--r--src/jit/instrsxarch.h1166
5 files changed, 669 insertions, 657 deletions
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index 72beee4a76..9c941bf665 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -98,7 +98,13 @@ protected:
bool m_genAlignLoops;
private:
+#if defined(_TARGET_XARCH_)
+ static const insFlags instInfo[INS_count];
+#elif defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)
static const BYTE instInfo[INS_count];
+#else
+#error Unsupported target architecture
+#endif
#define INST_FP 0x01 // is it a FP instruction?
public:
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index b879571ffe..1b1af430c6 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -1196,18 +1196,15 @@ inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
* The following table is used by the instIsFP()/instUse/DefFlags() helpers.
*/
-#define INST_DEF_FL 0x20 // does the instruction set flags?
-#define INST_USE_FL 0x40 // does the instruction use flags?
-
// clang-format off
-const BYTE CodeGenInterface::instInfo[] =
-{
- #define INST0(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
- #define INST1(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+const insFlags CodeGenInterface::instInfo[] =
+{
+ #define INST0(id, nm, um, mr, flags) static_cast<insFlags>(flags),
+ #define INST1(id, nm, um, mr, flags) static_cast<insFlags>(flags),
+ #define INST2(id, nm, um, mr, mi, flags) static_cast<insFlags>(flags),
+ #define INST3(id, nm, um, mr, mi, rm, flags) static_cast<insFlags>(flags),
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) static_cast<insFlags>(flags),
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags),
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1226,12 +1223,12 @@ const BYTE CodeGenInterface::instInfo[] =
// clang-format off
const BYTE emitter::emitInsModeFmtTab[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr ) um,
- #define INST1(id, nm, fp, um, rf, wf, mr ) um,
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) um,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) um,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) um,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
+ #define INST0(id, nm, um, mr, flags) um,
+ #define INST1(id, nm, um, mr, flags) um,
+ #define INST2(id, nm, um, mr, mi, flags) um,
+ #define INST3(id, nm, um, mr, mi, rm, flags) um,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) um,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1317,12 +1314,12 @@ inline size_t insCode(instruction ins)
const static
size_t insCodes[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr ) mr,
- #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+ #define INST0(id, nm, um, mr, flags) mr,
+ #define INST1(id, nm, um, mr, flags) mr,
+ #define INST2(id, nm, um, mr, mi, flags) mr,
+ #define INST3(id, nm, um, mr, mi, rm, flags) mr,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1350,12 +1347,12 @@ inline size_t insCodeACC(instruction ins)
const static
size_t insCodesACC[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi )
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) a4,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
+ #define INST0(id, nm, um, mr, flags)
+ #define INST1(id, nm, um, mr, flags)
+ #define INST2(id, nm, um, mr, mi, flags)
+ #define INST3(id, nm, um, mr, mi, rm, flags)
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) a4,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1383,12 +1380,12 @@ inline size_t insCodeRR(instruction ins)
const static
size_t insCodesRR[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi )
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
+ #define INST0(id, nm, um, mr, flags)
+ #define INST1(id, nm, um, mr, flags)
+ #define INST2(id, nm, um, mr, mi, flags)
+ #define INST3(id, nm, um, mr, mi, rm, flags)
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags)
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1409,12 +1406,12 @@ inline size_t insCodeRR(instruction ins)
const static
size_t insCodesRM[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi )
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
+ #define INST0(id, nm, um, mr, flags)
+ #define INST1(id, nm, um, mr, flags)
+ #define INST2(id, nm, um, mr, mi, flags)
+ #define INST3(id, nm, um, mr, mi, rm, flags) rm,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) rm,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1449,12 +1446,12 @@ inline size_t insCodeRM(instruction ins)
const static
size_t insCodesMI[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
+ #define INST0(id, nm, um, mr, flags)
+ #define INST1(id, nm, um, mr, flags)
+ #define INST2(id, nm, um, mr, mi, flags) mi,
+ #define INST3(id, nm, um, mr, mi, rm, flags) mi,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) mi,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi,
#include "instrs.h"
#undef INST0
#undef INST1
@@ -1489,12 +1486,12 @@ inline size_t insCodeMI(instruction ins)
const static
size_t insCodesMR[] =
{
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+ #define INST0(id, nm, um, mr, flags)
+ #define INST1(id, nm, um, mr, flags) mr,
+ #define INST2(id, nm, um, mr, mi, flags) mr,
+ #define INST3(id, nm, um, mr, mi, rm, flags) mr,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
#include "instrs.h"
#undef INST0
#undef INST1
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index 763a4033f5..1724f82ef0 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -37,12 +37,12 @@ const char* CodeGen::genInsName(instruction ins)
const char * const insNames[] =
{
#if defined(_TARGET_XARCH_)
- #define INST0(id, nm, fp, um, rf, wf, mr ) nm,
- #define INST1(id, nm, fp, um, rf, wf, mr ) nm,
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) nm,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) nm,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) nm,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) nm,
+ #define INST0(id, nm, um, mr, flags) nm,
+ #define INST1(id, nm, um, mr, flags) nm,
+ #define INST2(id, nm, um, mr, mi, flags) nm,
+ #define INST3(id, nm, um, mr, mi, rm, flags) nm,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) nm,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) nm,
#include "instrs.h"
#elif defined(_TARGET_ARM_)
@@ -215,7 +215,11 @@ bool CodeGenInterface::instIsFP(instruction ins)
{
assert((unsigned)ins < _countof(instInfo));
+#ifdef _TARGET_XARCH_
+ return (instInfo[ins] & INS_FLAGS_x87Instr) != 0;
+#else
return (instInfo[ins] & INST_FP) != 0;
+#endif
}
#ifdef _TARGET_XARCH_
diff --git a/src/jit/instr.h b/src/jit/instr.h
index efa94123b0..c00a61ceb4 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -15,12 +15,12 @@
enum instruction : unsigned
{
#if defined(_TARGET_XARCH_)
- #define INST0(id, nm, fp, um, rf, wf, mr ) INS_##id,
- #define INST1(id, nm, fp, um, rf, wf, mr ) INS_##id,
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) INS_##id,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) INS_##id,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) INS_##id,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) INS_##id,
+ #define INST0(id, nm, um, mr, flags) INS_##id,
+ #define INST1(id, nm, um, mr, flags) INS_##id,
+ #define INST2(id, nm, um, mr, mi, flags) INS_##id,
+ #define INST3(id, nm, um, mr, mi, rm, flags) INS_##id,
+ #define INST4(id, nm, um, mr, mi, rm, a4, flags) INS_##id,
+ #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) INS_##id,
#include "instrs.h"
#elif defined(_TARGET_ARM_)
@@ -86,13 +86,28 @@ enum GCtype : unsigned
GCT_BYREF
};
-// TODO-Cleanup: Move 'insFlags' under _TARGET_ARM_
+#if defined(_TARGET_XARCH_)
+enum insFlags: uint8_t
+{
+ INS_FLAGS_None = 0x00,
+ INS_FLAGS_ReadsFlags = 0x01,
+ INS_FLAGS_WritesFlags = 0x02,
+ INS_FLAGS_x87Instr = 0x04,
+
+ // TODO-Cleanup: Remove this flag and its usage from _TARGET_XARCH_
+ INS_FLAGS_DONT_CARE = 0x00,
+};
+#elif defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)
+// TODO-Cleanup: Move 'insFlags' under _TARGET_ARM_
enum insFlags: unsigned
{
- INS_FLAGS_NOT_SET,
- INS_FLAGS_SET,
- INS_FLAGS_DONT_CARE
+ INS_FLAGS_NOT_SET = 0x00,
+ INS_FLAGS_SET = 0x01,
+ INS_FLAGS_DONT_CARE = 0x02,
};
+#else
+#error Unsupported target architecture
+#endif
#if defined(_TARGET_ARM_)
enum insOpts: unsigned
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index e46134ec25..d4278f7957 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -10,15 +10,13 @@
*
* id -- the enum name for the instruction
* nm -- textual name (for assembly dipslay)
- * fp -- 1 = floating point instruction, 0 = not floating point instruction
* um -- update mode, see IUM_xx enum (rd, wr, or rw)
- * rf -- 1 = reads flags, 0 = doesn't read flags
- * wf -- 1 = writes flags, 0 = doesn't write flags
* mr -- base encoding for R/M[reg] addressing mode
* mi -- base encoding for R/M,icon addressing mode
* rm -- base encoding for reg,R/M addressing mode
* a4 -- base encoding for eax,i32 addressing mode
* rr -- base encoding for register addressing mode
+ * flags -- flags, see INS_FLAGS_* enum
*
******************************************************************************/
@@ -32,111 +30,110 @@
#endif
/*****************************************************************************/
#ifndef INST0
-#define INST0(id, nm, fp, um, rf, wf, mr )
+#define INST0(id, nm, um, mr, flags)
#endif
#ifndef INST2
-#define INST2(id, nm, fp, um, rf, wf, mr, mi )
+#define INST2(id, nm, um, mr, mi, flags)
#endif
#ifndef INST3
-#define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
+#define INST3(id, nm, um, mr, mi, rm, flags)
#endif
#ifndef INST4
-#define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
+#define INST4(id, nm, um, mr, mi, rm, a4, flags)
#endif
#ifndef INST5
-#define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr)
+#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags)
#endif
/*****************************************************************************/
/* The following is x86-specific */
/*****************************************************************************/
-// enum name FP updmode rf wf R/M[reg] R/M,icon reg,R/M eax,i32 register
-INST5(invalid, "INVALID" , 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
+// id nm um mr mi rm a4 rr flags
+INST5(invalid, "INVALID", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
-INST5(push , "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
-INST5(pop , "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
+INST5(push, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None)
+INST5(pop, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None)
// Does not affect the stack tracking in the emitter
-INST5(push_hide, "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
-INST5(pop_hide, "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
-
-INST5(inc , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040)
-INST5(inc_l , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE)
-INST5(dec , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048)
-INST5(dec_l , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE)
-
-// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M eax,i32
-
-INST4(add , "add" , 0, IUM_RW, 0, 1, 0x000000, 0x000080, 0x000002, 0x000004)
-INST4(or , "or" , 0, IUM_RW, 0, 1, 0x000008, 0x000880, 0x00000A, 0x00000C)
-INST4(adc , "adc" , 0, IUM_RW, 1, 1, 0x000010, 0x001080, 0x000012, 0x000014)
-INST4(sbb , "sbb" , 0, IUM_RW, 1, 1, 0x000018, 0x001880, 0x00001A, 0x00001C)
-INST4(and , "and" , 0, IUM_RW, 0, 1, 0x000020, 0x002080, 0x000022, 0x000024)
-INST4(sub , "sub" , 0, IUM_RW, 0, 1, 0x000028, 0x002880, 0x00002A, 0x00002C)
-INST4(xor , "xor" , 0, IUM_RW, 0, 1, 0x000030, 0x003080, 0x000032, 0x000034)
-INST4(cmp , "cmp" , 0, IUM_RD, 0, 1, 0x000038, 0x003880, 0x00003A, 0x00003C)
-INST4(test , "test" , 0, IUM_RD, 0, 1, 0x000084, 0x0000F6, 0x000084, 0x0000A8)
-INST4(mov , "mov" , 0, IUM_WR, 0, 0, 0x000088, 0x0000C6, 0x00008A, 0x0000B0)
-
-INST4(lea , "lea" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE)
-
-// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
+INST5(push_hide, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None)
+INST5(pop_hide, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None)
+
+INST5(inc, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesFlags)
+INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesFlags)
+INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags)
+INST5(dec_l, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags)
+
+// id nm um mr mi rm a4 flags
+INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags)
+INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags)
+INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesFlags)
+INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesFlags)
+INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesFlags)
+INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesFlags)
+INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesFlags)
+INST4(mov, "mov", IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None)
+
+INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None)
+
+// id nm um mr mi rm flags
// Note that emitter has only partial support for BT. It can only emit the reg,reg form
// and the registers need to be reversed to get the correct encoding.
-INST3(bt , "bt" , 0, IUM_RD, 0, 1, 0x0F00A3, BAD_CODE, 0x0F00A3)
+INST3(bt, "bt", IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags)
-INST3(movsx , "movsx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE)
+INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None)
#ifdef _TARGET_AMD64_
-INST3(movsxd , "movsxd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL )
+INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, 0x4800000063, INS_FLAGS_None)
#endif
-INST3(movzx , "movzx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00B6)
-
-INST3(cmovo , "cmovo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0040)
-INST3(cmovno , "cmovno" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0041)
-INST3(cmovb , "cmovb" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0042)
-INST3(cmovae , "cmovae" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0043)
-INST3(cmove , "cmove" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0044)
-INST3(cmovne , "cmovne" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0045)
-INST3(cmovbe , "cmovbe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0046)
-INST3(cmova , "cmova" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0047)
-INST3(cmovs , "cmovs" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0048)
-INST3(cmovns , "cmovns" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0049)
-INST3(cmovpe , "cmovpe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004A)
-INST3(cmovpo , "cmovpo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004B)
-INST3(cmovl , "cmovl" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004C)
-INST3(cmovge , "cmovge" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004D)
-INST3(cmovle , "cmovle" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004E)
-INST3(cmovg , "cmovg" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004F)
-
-INST3(xchg , "xchg" , 0, IUM_RW, 0, 0, 0x000086, BAD_CODE, 0x000086)
-INST3(imul , "imul" , 0, IUM_RW, 0, 1, 0x0F00AC, BAD_CODE, 0x0F00AF) // op1 *= op2
-
-// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
+INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None)
+
+INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsFlags)
+INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsFlags)
+INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsFlags)
+INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsFlags)
+INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsFlags)
+INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsFlags)
+INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsFlags)
+INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsFlags)
+INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsFlags)
+INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsFlags)
+INST3(cmovpe, "cmovpe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsFlags)
+INST3(cmovpo, "cmovpo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsFlags)
+INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsFlags)
+INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsFlags)
+INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsFlags)
+INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsFlags)
+
+INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None)
+INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesFlags) // op1 *= op2
+
+// id nm um mr mi rm flags
// Instead of encoding these as 3-operand instructions, we encode them
// as 2-operand instructions with the target register being implicit
// implicit_reg = op1*op2_icon
#define INSTMUL INST3
-INSTMUL(imul_AX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000068, BAD_CODE)
-INSTMUL(imul_CX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000868, BAD_CODE)
-INSTMUL(imul_DX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001068, BAD_CODE)
-INSTMUL(imul_BX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001868, BAD_CODE)
-INSTMUL(imul_SP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, BAD_CODE, BAD_CODE)
-INSTMUL(imul_BP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x002868, BAD_CODE)
-INSTMUL(imul_SI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003068, BAD_CODE)
-INSTMUL(imul_DI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003868, BAD_CODE)
+INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesFlags)
#ifdef _TARGET_AMD64_
-INSTMUL(imul_08, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000068, BAD_CODE)
-INSTMUL(imul_09, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000868, BAD_CODE)
-INSTMUL(imul_10, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001068, BAD_CODE)
-INSTMUL(imul_11, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001868, BAD_CODE)
-INSTMUL(imul_12, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002068, BAD_CODE)
-INSTMUL(imul_13, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002868, BAD_CODE)
-INSTMUL(imul_14, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003068, BAD_CODE)
-INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE)
+INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesFlags)
+INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesFlags)
#endif // _TARGET_AMD64_
@@ -176,578 +173,571 @@ INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE)
#define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
#define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
-// Please insert any SSE2 instructions between FIRST_SSE2_INSTRUCTION and LAST_SSE2_INSTRUCTION
-INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-
+INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// These are the SSE instructions used on x86
-INST3( mov_i2xmm, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
-INST3( mov_xmm2i, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
-INST3( pmovmskb, "pmovmskb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD7)) // Move the MSB bits of all bytes in a xmm reg to an int reg
-INST3( movmskpd, "movmskpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x50)) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
-INST3( movd, "movd" , 0, IUM_WR, 0, 0, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E))
-INST3( movq, "movq" , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E))
-INST3( movsdsse2, "movsd" , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10))
-
-INST3( punpckldq, "punpckldq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x62))
-
-INST3( xorps, "xorps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x57)) // XOR packed singles
-
-INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs
-
-INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE)
-INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE)
-INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE)
-INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
-INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
-INST3( movdqa, "movdqa" , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F))
-INST3( movlpd, "movlpd" , 0, IUM_WR, 0, 0, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12))
-INST3( movlps, "movlps" , 0, IUM_WR, 0, 0, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12))
-INST3( movhpd, "movhpd" , 0, IUM_WR, 0, 0, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16))
-INST3( movhps, "movhps" , 0, IUM_WR, 0, 0, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16))
-INST3( movss, "movss" , 0, IUM_WR, 0, 0, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10))
-INST3( movapd, "movapd" , 0, IUM_WR, 0, 0, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28))
-INST3( movaps, "movaps" , 0, IUM_WR, 0, 0, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28))
-INST3( movupd, "movupd" , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10))
-INST3( movups, "movups" , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
-INST3( movhlps, "movhlps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x12))
-INST3( movlhps, "movlhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x16))
-INST3( movmskps, "movmskps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x50))
-INST3( unpckhps, "unpckhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x15))
-INST3( unpcklps, "unpcklps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x14))
-INST3( maskmovdqu, "maskmovdqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF7))
-
-INST3( shufps, "shufps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC6))
-INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC6))
-
-INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A))
-
-INST3( lfence, "lfence" , 0, IUM_RD, 0, 0, 0x000FE8AE, BAD_CODE, BAD_CODE)
-INST3( mfence, "mfence" , 0, IUM_RD, 0, 0, 0x000FF0AE, BAD_CODE, BAD_CODE)
-INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE)
-INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE)
-INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE)
-INST3( prefetcht2, "prefetcht2" , 0, IUM_RD, 0, 0, 0x000F1818, BAD_CODE, BAD_CODE)
-INST3( sfence, "sfence" , 0, IUM_RD, 0, 0, 0x000FF8AE, BAD_CODE, BAD_CODE)
+INST3(mov_i2xmm, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
+INST3(mov_xmm2i, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
+INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg
+INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
+INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None)
+INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None)
+INST3(movsdsse2, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_FLAGS_None)
+
+INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_FLAGS_None)
+
+INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_FLAGS_None) // XOR packed singles
+
+INST3(cvttsd2si, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_FLAGS_None) // cvt with trunc scalar double to signed DWORDs
+
+INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(movnti, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(movntps, "movntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(movdqu, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_FLAGS_None)
+INST3(movdqa, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_FLAGS_None)
+INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_FLAGS_None)
+INST3(movlps, "movlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_FLAGS_None)
+INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_FLAGS_None)
+INST3(movhps, "movhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_FLAGS_None)
+INST3(movss, "movss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_FLAGS_None)
+INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_FLAGS_None)
+INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_FLAGS_None)
+INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_FLAGS_None)
+INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_FLAGS_None)
+INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_FLAGS_None)
+INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_FLAGS_None)
+INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_FLAGS_None)
+INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_FLAGS_None)
+INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_FLAGS_None)
+INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_FLAGS_None)
+
+INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_FLAGS_None)
+INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_FLAGS_None)
+
+INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_FLAGS_None)
+
+INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// SSE 2 arith
-INST3( addps, "addps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x58)) // Add packed singles
-INST3( addss, "addss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x58)) // Add scalar singles
-INST3( addpd, "addpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x58)) // Add packed doubles
-INST3( addsd, "addsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x58)) // Add scalar doubles
-INST3( mulps, "mulps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x59)) // Multiply packed singles
-INST3( mulss, "mulss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x59)) // Multiply scalar single
-INST3( mulpd, "mulpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x59)) // Multiply packed doubles
-INST3( mulsd, "mulsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x59)) // Multiply scalar doubles
-INST3( subps, "subps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5C)) // Subtract packed singles
-INST3( subss, "subss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5C)) // Subtract scalar singles
-INST3( subpd, "subpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5C)) // Subtract packed doubles
-INST3( subsd, "subsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5C)) // Subtract scalar doubles
-INST3( minps, "minps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5D)) // Return Minimum packed singles
-INST3( minss, "minss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5D)) // Return Minimum scalar single
-INST3( minpd, "minpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5D)) // Return Minimum packed doubles
-INST3( minsd, "minsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5D)) // Return Minimum scalar double
-INST3( divps, "divps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5E)) // Divide packed singles
-INST3( divss, "divss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5E)) // Divide scalar singles
-INST3( divpd, "divpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5E)) // Divide packed doubles
-INST3( divsd, "divsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5E)) // Divide scalar doubles
-INST3( maxps, "maxps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5F)) // Return Maximum packed singles
-INST3( maxss, "maxss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5F)) // Return Maximum scalar single
-INST3( maxpd, "maxpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5F)) // Return Maximum packed doubles
-INST3( maxsd, "maxsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5F)) // Return Maximum scalar double
-INST3( xorpd, "xorpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x57)) // XOR packed doubles
-INST3( andps, "andps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x54)) // AND packed singles
-INST3( andpd, "andpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x54)) // AND packed doubles
-INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51)) // Sqrt of packed singles
-INST3( sqrtss, "sqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x51)) // Sqrt of scalar single
-INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51)) // Sqrt of packed doubles
-INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51)) // Sqrt of scalar double
-INST3( andnps, "andnps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x55)) // And-Not packed singles
-INST3( andnpd, "andnpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x55)) // And-Not packed doubles
-INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) // Or packed singles
-INST3( orpd, "orpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56)) // Or packed doubles
-INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C)) // Horizontal add packed doubles
-INST3( haddps, "haddps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x7C)) // Horizontal add packed floats
-INST3( hsubpd, "hsubpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7D)) // Horizontal subtract packed doubles
-INST3( hsubps, "hsubps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x7D)) // Horizontal subtract packed floats
-INST3( addsubps, "addsubps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xD0)) // Add/Subtract packed singles
-INST3( addsubpd, "addsubpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD0)) // Add/Subtract packed doubles
+INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_FLAGS_None) // Add packed singles
+INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_FLAGS_None) // Add scalar singles
+INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_FLAGS_None) // Add packed doubles
+INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_FLAGS_None) // Add scalar doubles
+INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_FLAGS_None) // Multiply packed singles
+INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_FLAGS_None) // Multiply scalar single
+INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_FLAGS_None) // Multiply packed doubles
+INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_FLAGS_None) // Multiply scalar doubles
+INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_FLAGS_None) // Subtract packed singles
+INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_FLAGS_None) // Subtract scalar singles
+INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_FLAGS_None) // Subtract packed doubles
+INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_FLAGS_None) // Subtract scalar doubles
+INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_FLAGS_None) // Return Minimum packed singles
+INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_FLAGS_None) // Return Minimum scalar single
+INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_FLAGS_None) // Return Minimum packed doubles
+INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_FLAGS_None) // Return Minimum scalar double
+INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_FLAGS_None) // Divide packed singles
+INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_FLAGS_None) // Divide scalar singles
+INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_FLAGS_None) // Divide packed doubles
+INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_FLAGS_None) // Divide scalar doubles
+INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_FLAGS_None) // Return Maximum packed singles
+INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_FLAGS_None) // Return Maximum scalar single
+INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_FLAGS_None) // Return Maximum packed doubles
+INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_FLAGS_None) // Return Maximum scalar double
+INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_FLAGS_None) // XOR packed doubles
+INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_FLAGS_None) // AND packed singles
+INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_FLAGS_None) // AND packed doubles
+INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_FLAGS_None) // Sqrt of packed singles
+INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_FLAGS_None) // Sqrt of scalar single
+INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_FLAGS_None) // Sqrt of packed doubles
+INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_FLAGS_None) // Sqrt of scalar double
+INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_FLAGS_None) // And-Not packed singles
+INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_FLAGS_None) // And-Not packed doubles
+INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_FLAGS_None) // Or packed singles
+INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_FLAGS_None) // Or packed doubles
+INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_FLAGS_None) // Horizontal add packed doubles
+INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_FLAGS_None) // Horizontal add packed floats
+INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_FLAGS_None) // Horizontal subtract packed doubles
+INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_FLAGS_None) // Horizontal subtract packed floats
+INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_FLAGS_None) // Add/Subtract packed singles
+INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_FLAGS_None) // Add/Subtract packed doubles
// SSE 2 approx arith
-INST3( rcpps, "rcpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x53)) // Reciprocal of packed singles
-INST3( rcpss, "rcpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x53)) // Reciprocal of scalar single
-INST3( rsqrtps, "rsqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x52)) // Reciprocal Sqrt of packed singles
-INST3( rsqrtss, "rsqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x52)) // Reciprocal Sqrt of scalar single
+INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_FLAGS_None) // Reciprocal of packed singles
+INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_FLAGS_None) // Reciprocal of scalar single
+INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_FLAGS_None) // Reciprocal Sqrt of packed singles
+INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_FLAGS_None) // Reciprocal Sqrt of scalar single
// SSE2 conversions
-INST3( cvtpi2ps, "cvtpi2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A)) // cvt packed DWORDs to singles
-INST3( cvtsi2ss, "cvtsi2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A)) // cvt DWORD to scalar single
-INST3( cvtpi2pd, "cvtpi2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2A)) // cvt packed DWORDs to doubles
-INST3( cvtsi2sd, "cvtsi2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2A)) // cvt DWORD to scalar double
-INST3( cvttps2pi, "cvttps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2C)) // cvt with trunc packed singles to DWORDs
-INST3( cvttss2si, "cvttss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2C)) // cvt with trunc scalar single to DWORD
-INST3( cvttpd2pi, "cvttpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2C)) // cvt with trunc packed doubles to DWORDs
-INST3( cvtps2pi, "cvtps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2D)) // cvt packed singles to DWORDs
-INST3( cvtss2si, "cvtss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2D)) // cvt scalar single to DWORD
-INST3( cvtpd2pi, "cvtpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2D)) // cvt packed doubles to DWORDs
-INST3( cvtsd2si, "cvtsd2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2D)) // cvt scalar double to DWORD
-INST3( cvtps2pd, "cvtps2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5A)) // cvt packed singles to doubles
-INST3( cvtpd2ps, "cvtpd2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5A)) // cvt packed doubles to singles
-INST3( cvtss2sd, "cvtss2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5A)) // cvt scalar single to scalar doubles
-INST3( cvtsd2ss, "cvtsd2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5A)) // cvt scalar double to scalar singles
-INST3( cvtdq2ps, "cvtdq2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5B)) // cvt packed DWORDs to singles
-INST3( cvtps2dq, "cvtps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5B)) // cvt packed singles to DWORDs
-INST3( cvttps2dq, "cvttps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5B)) // cvt with trunc packed singles to DWORDs
-INST3( cvtpd2dq, "cvtpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xE6)) // cvt packed doubles to DWORDs
-INST3( cvttpd2dq, "cvttpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE6)) // cvt with trunc packed doubles to DWORDs
-INST3( cvtdq2pd, "cvtdq2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xE6)) // cvt packed DWORDs to doubles
+INST3(cvtpi2ps, "cvtpi2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2A), INS_FLAGS_None) // cvt packed DWORDs to singles
+INST3(cvtsi2ss, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_FLAGS_None) // cvt DWORD to scalar single
+INST3(cvtpi2pd, "cvtpi2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2A), INS_FLAGS_None) // cvt packed DWORDs to doubles
+INST3(cvtsi2sd, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_FLAGS_None) // cvt DWORD to scalar double
+INST3(cvttps2pi, "cvttps2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2C), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
+INST3(cvttss2si, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_FLAGS_None) // cvt with trunc scalar single to DWORD
+INST3(cvttpd2pi, "cvttpd2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2C), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs
+INST3(cvtps2pi, "cvtps2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2D), INS_FLAGS_None) // cvt packed singles to DWORDs
+INST3(cvtss2si, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_FLAGS_None) // cvt scalar single to DWORD
+INST3(cvtpd2pi, "cvtpd2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2D), INS_FLAGS_None) // cvt packed doubles to DWORDs
+INST3(cvtsd2si, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_FLAGS_None) // cvt scalar double to DWORD
+INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_FLAGS_None) // cvt packed singles to doubles
+INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_FLAGS_None) // cvt packed doubles to singles
+INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_FLAGS_None) // cvt scalar single to scalar doubles
+INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_FLAGS_None) // cvt scalar double to scalar singles
+INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_FLAGS_None) // cvt packed DWORDs to singles
+INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_FLAGS_None) // cvt packed singles to DWORDs
+INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
+INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_FLAGS_None) // cvt packed doubles to DWORDs
+INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs
+INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles
// SSE2 comparison instructions
-INST3( comiss, "comiss", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2F)) // ordered compare singles
-INST3( comisd, "comisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2F)) // ordered compare doubles
-INST3( ucomiss, "ucomiss", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2E)) // unordered compare singles
-INST3( ucomisd, "ucomisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2E)) // unordered compare doubles
+INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_None) // ordered compare singles
+INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_None) // ordered compare doubles
+INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_None) // unordered compare singles
+INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_None) // unordered compare doubles
// SSE2 packed single/double comparison operations.
// Note that these instructions not only compare but also overwrite the first source.
-INST3( cmpps, "cmpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC2)) // compare packed singles
-INST3( cmppd, "cmppd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC2)) // compare packed doubles
-INST3( cmpss, "cmpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xC2)) // compare scalar singles
-INST3( cmpsd, "cmpsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xC2)) // compare scalar doubles
+INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_FLAGS_None) // compare packed singles
+INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_FLAGS_None) // compare packed doubles
+INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_FLAGS_None) // compare scalar singles
+INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_FLAGS_None) // compare scalar doubles
//SSE2 packed integer operations
-INST3( paddb, "paddb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFC)) // Add packed byte integers
-INST3( paddw, "paddw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFD)) // Add packed word (16-bit) integers
-INST3( paddd, "paddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFE)) // Add packed double-word (32-bit) integers
-INST3( paddq, "paddq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD4)) // Add packed quad-word (64-bit) integers
-INST3( paddsb, "paddsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEC)) // Add packed signed byte integers and saturate the results
-INST3( paddsw, "paddsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xED)) // Add packed signed word integers and saturate the results
-INST3( paddusb, "paddusb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDC)) // Add packed unsigned byte integers and saturate the results
-INST3( paddusw, "paddusw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDD)) // Add packed unsigned word integers and saturate the results
-INST3( pavgb, "pavgb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE0)) // Average of packed byte integers
-INST3( pavgw, "pavgw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE3)) // Average of packed word integers
-INST3( psubb, "psubb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF8)) // Subtract packed word (16-bit) integers
-INST3( psubw, "psubw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF9)) // Subtract packed word (16-bit) integers
-INST3( psubd, "psubd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFA)) // Subtract packed double-word (32-bit) integers
-INST3( psubq, "psubq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFB)) // subtract packed quad-word (64-bit) integers
-INST3( pmaddwd, "pmaddwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF5)) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
-INST3( pmulhw, "pmulhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE5)) // Multiply high the packed 16-bit signed integers
-INST3( pmulhuw, "pmulhuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE4)) // Multiply high the packed 16-bit unsigned integers
-INST3( pmuludq, "pmuludq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF4)) // packed multiply 32-bit unsigned integers and store 64-bit result
-INST3( pmullw, "pmullw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD5)) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
-INST3( pand, "pand" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDB)) // Packed bit-wise AND of two xmm regs
-INST3( pandn, "pandn" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDF)) // Packed bit-wise AND NOT of two xmm regs
-INST3( por, "por" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEB)) // Packed bit-wise OR of two xmm regs
-INST3( pxor, "pxor" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEF)) // Packed bit-wise XOR of two xmm regs
-INST3( psadbw, "psadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF6)) // Compute the sum of absolute differences of packed unsigned 8-bit integers
-INST3( psubsb, "psubsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE8)) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
-INST3( psubusb, "psubusb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD8)) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
-INST3( psubsw, "psubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE9)) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
-INST3( psubusw, "psubusw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD9)) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
+INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_FLAGS_None) // Add packed byte integers
+INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_FLAGS_None) // Add packed word (16-bit) integers
+INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_FLAGS_None) // Add packed double-word (32-bit) integers
+INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_FLAGS_None) // Add packed quad-word (64-bit) integers
+INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_FLAGS_None) // Add packed signed byte integers and saturate the results
+INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_FLAGS_None) // Add packed signed word integers and saturate the results
+INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_FLAGS_None) // Add packed unsigned byte integers and saturate the results
+INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_FLAGS_None) // Add packed unsigned word integers and saturate the results
+INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_FLAGS_None) // Average of packed byte integers
+INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_FLAGS_None) // Average of packed word integers
+INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_FLAGS_None) // Subtract packed word (16-bit) integers
+INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_FLAGS_None) // Subtract packed word (16-bit) integers
+INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_FLAGS_None) // Subtract packed double-word (32-bit) integers
+INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_FLAGS_None) // subtract packed quad-word (64-bit) integers
+INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_FLAGS_None) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
+INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_FLAGS_None) // Multiply high the packed 16-bit signed integers
+INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_FLAGS_None) // Multiply high the packed 16-bit unsigned integers
+INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_FLAGS_None) // packed multiply 32-bit unsigned integers and store 64-bit result
+INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_FLAGS_None) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
+INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_FLAGS_None) // Packed bit-wise AND of two xmm regs
+INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_FLAGS_None) // Packed bit-wise AND NOT of two xmm regs
+INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_FLAGS_None) // Packed bit-wise OR of two xmm regs
+INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_FLAGS_None) // Packed bit-wise XOR of two xmm regs
+INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_FLAGS_None) // Compute the sum of absolute differences of packed unsigned 8-bit integers
+INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_FLAGS_None) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
+INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_FLAGS_None) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
+INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_FLAGS_None) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
+INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_FLAGS_None) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
// Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode,
// which is handled in emitxarch.cpp.
-INST3( psrldq, "psrldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift right logical of xmm reg by given number of bytes
-INST3( pslldq, "pslldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift left logical of xmm reg by given number of bytes
-INST3( psllw, "psllw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1)) // Packed shift left logical of 16-bit integers
-INST3( pslld, "pslld" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2)) // Packed shift left logical of 32-bit integers
-INST3( psllq, "psllq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3)) // Packed shift left logical of 64-bit integers
-INST3( psrlw, "psrlw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1)) // Packed shift right logical of 16-bit integers
-INST3( psrld, "psrld" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2)) // Packed shift right logical of 32-bit integers
-INST3( psrlq, "psrlq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3)) // Packed shift right logical of 64-bit integers
-INST3( psraw, "psraw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1)) // Packed shift right arithmetic of 16-bit integers
-INST3( psrad, "psrad" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2)) // Packed shift right arithmetic of 32-bit integers
-
-INST3( pmaxub, "pmaxub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDE)) // packed maximum unsigned bytes
-INST3( pminub, "pminub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDA)) // packed minimum unsigned bytes
-INST3( pmaxsw, "pmaxsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEE)) // packed maximum signed words
-INST3( pminsw, "pminsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEA)) // packed minimum signed words
-INST3( pcmpeqd, "pcmpeqd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x76)) // Packed compare 32-bit integers for equality
-INST3( pcmpgtd, "pcmpgtd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x66)) // Packed compare 32-bit signed integers for greater than
-INST3( pcmpeqw, "pcmpeqw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x75)) // Packed compare 16-bit integers for equality
-INST3( pcmpgtw, "pcmpgtw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x65)) // Packed compare 16-bit signed integers for greater than
-INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x74)) // Packed compare 8-bit integers for equality
-INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than
-
-INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers
-INST3( pshufhw, "pshufhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x70)) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
-INST3( pshuflw, "pshuflw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x70)) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
-INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits
-INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // Insert word at index
-
-INST3( punpckhbw, "punpckhbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x68)) // Packed logical (unsigned) widen ubyte to ushort (hi)
-INST3( punpcklbw, "punpcklbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x60)) // Packed logical (unsigned) widen ubyte to ushort (lo)
-INST3( punpckhqdq, "punpckhqdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6D)) // Packed logical (unsigned) widen uint to ulong (hi)
-INST3( punpcklqdq, "punpcklqdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6C)) // Packed logical (unsigned) widen uint to ulong (lo)
-INST3( punpckhwd, "punpckhwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x69)) // Packed logical (unsigned) widen ushort to uint (hi)
-INST3( punpcklwd, "punpcklwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x61)) // Packed logical (unsigned) widen ushort to uint (lo)
-INST3( unpckhpd, "unpckhpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x15)) // Packed logical (unsigned) widen ubyte to ushort (hi)
-INST3( unpcklpd, "unpcklpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x14)) // Packed logical (unsigned) widen ubyte to ushort (hi)
-
-INST3( packssdw, "packssdw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6B)) // Pack (narrow) int to short with saturation
-INST3( packsswb, "packsswb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x63)) // Pack (narrow) short to byte with saturation
-INST3( packuswb, "packuswb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x67)) // Pack (narrow) short to unsigned byte with saturation
-
-INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-
-INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-
-// enum name FP updmode rf wf MR MI RM
-INST3( dpps, "dpps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x40)) // Packed dot product of two float vector regs
-INST3( dppd, "dppd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x41)) // Packed dot product of two double vector regs
-INST3( insertps, "insertps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x21)) // Insert packed single precision float value
-INST3( pcmpeqq, "pcmpeqq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x29)) // Packed compare 64-bit integers for equality
-INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x37)) // Packed compare 64-bit integers for equality
-INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
-INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
-INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
-INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
-INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
-INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
-INST3( palignr, "palignr" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0F)) // Packed Align Right
-INST3( pmaddubsw, "pmaddubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x04)) // Multiply and Add Packed Signed and Unsigned Bytes
-INST3( pmulhrsw, "pmulhrsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0B)) // Packed Multiply High with Round and Scale
-INST3( pshufb, "pshufb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x00)) // Packed Shuffle Bytes
-INST3( psignb, "psignb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x08)) // Packed SIGN
-INST3( psignw, "psignw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x09)) // Packed SIGN
-INST3( psignd, "psignd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0A)) // Packed SIGN
-INST3( pminsb, "pminsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x38)) // packed minimum signed bytes
-INST3( pminsd, "pminsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x39)) // packed minimum 32-bit signed integers
-INST3( pminuw, "pminuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3A)) // packed minimum 16-bit unsigned integers
-INST3( pminud, "pminud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3B)) // packed minimum 32-bit unsigned integers
-INST3( pmaxsb, "pmaxsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3C)) // packed maximum signed bytes
-INST3( pmaxsd, "pmaxsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3D)) // packed maximum 32-bit signed integers
-INST3( pmaxuw, "pmaxuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3E)) // packed maximum 16-bit unsigned integers
-INST3( pmaxud, "pmaxud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3F)) // packed maximum 32-bit unsigned integers
-INST3( pmovsxbw, "pmovsxbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x20)) // Packed sign extend byte to short
-INST3( pmovsxbd, "pmovsxbd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x21)) // Packed sign extend byte to int
-INST3( pmovsxbq, "pmovsxbq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x22)) // Packed sign extend byte to long
-INST3( pmovsxwd, "pmovsxwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x23)) // Packed sign extend short to int
-INST3( pmovsxwq, "pmovsxwq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x24)) // Packed sign extend short to long
-INST3( pmovsxdq, "pmovsxdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x25)) // Packed sign extend int to long
-INST3( pmovzxbw, "pmovzxbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x30)) // Packed zero extend byte to short
-INST3( pmovzxbd, "pmovzxbd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x31)) // Packed zero extend byte to intg
-INST3( pmovzxbq, "pmovzxbq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x32)) // Packed zero extend byte to lon
-INST3( pmovzxwd, "pmovzxwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x33)) // Packed zero extend short to int
-INST3( pmovzxwq, "pmovzxwq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x34)) // Packed zero extend short to long
-INST3( pmovzxdq, "pmovzxdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x35)) // Packed zero extend int to long
-INST3( packusdw, "packusdw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2B)) // Pack (narrow) int to unsigned short with saturation
-INST3( roundps, "roundps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x08)) // Round packed single precision floating-point values
-INST3( roundss, "roundss" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0A)) // Round scalar single precision floating-point values
-INST3( roundpd, "roundpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x09)) // Round packed double precision floating-point values
-INST3( roundsd, "roundsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0B)) // Round scalar double precision floating-point values
-INST3( pmuldq, "pmuldq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x28)) // packed multiply 32-bit signed integers and store 64-bit result
-INST3( blendps, "blendps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0C)) // Blend Packed Single Precision Floating-Point Values
-INST3( blendvps, "blendvps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x14)) // Variable Blend Packed Singles
-INST3( blendpd, "blendpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0D)) // Blend Packed Double Precision Floating-Point Values
-INST3( blendvpd, "blendvpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x15)) // Variable Blend Packed Doubles
-INST3( pblendw, "pblendw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0E)) // Blend Packed Words
-INST3( pblendvb, "pblendvb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x10)) // Variable Blend Packed Bytes
-INST3( phaddw, "phaddw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x01)) // Packed horizontal add of 16-bit integers
-INST3( phsubw, "phsubw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x05)) // Packed horizontal subtract of 16-bit integers
-INST3( phsubd, "phsubd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x06)) // Packed horizontal subtract of 32-bit integers
-INST3( phaddsw, "phaddsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x03)) // Packed horizontal add of 16-bit integers with saturation
-INST3( phsubsw, "phsubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x07)) // Packed horizontal subtract of 16-bit integers with saturation
-INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer
-INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint
-INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values
-INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values
-INST3( movshdup, "movshdup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x16)) // Replicate odd-indexed Single FP Values
-INST3( phminposuw, "phminposuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x41)) // Packed Horizontal Word Minimum
-INST3( mpsadbw, "mpsadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x42)) // Compute Multiple Packed Sums of Absolute Difference
-INST3( pinsrb, "pinsrb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x20)) // Insert Byte
-INST3( pinsrd, "pinsrd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x22)) // Insert Dword
-INST3( pinsrq, "pinsrq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x22)) // Insert Qword
-INST3( pextrb, "pextrb" , 0, IUM_WR, 0, 0, SSE3A(0x14), BAD_CODE, BAD_CODE) // Extract Byte
-INST3( pextrd, "pextrd" , 0, IUM_WR, 0, 0, SSE3A(0x16), BAD_CODE, BAD_CODE) // Extract Dword
-INST3( pextrq, "pextrq" , 0, IUM_WR, 0, 0, SSE3A(0x16), BAD_CODE, BAD_CODE) // Extract Qword
-INST3( pextrw_sse41, "pextrw" , 0, IUM_WR, 0, 0, SSE3A(0x15), BAD_CODE, BAD_CODE) // Extract Word
-INST3( extractps, "extractps" , 0, IUM_WR, 0, 0, SSE3A(0x17), BAD_CODE, BAD_CODE) // Extract Packed Floating-Point Values
+INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_FLAGS_None) // Shift right logical of xmm reg by given number of bytes
+INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_FLAGS_None) // Shift left logical of xmm reg by given number of bytes
+INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_FLAGS_None) // Packed shift left logical of 16-bit integers
+INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_FLAGS_None) // Packed shift left logical of 32-bit integers
+INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_FLAGS_None) // Packed shift left logical of 64-bit integers
+INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_FLAGS_None) // Packed shift right logical of 16-bit integers
+INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_FLAGS_None) // Packed shift right logical of 32-bit integers
+INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_FLAGS_None) // Packed shift right logical of 64-bit integers
+INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_FLAGS_None) // Packed shift right arithmetic of 16-bit integers
+INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_FLAGS_None) // Packed shift right arithmetic of 32-bit integers
+
+INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_FLAGS_None) // packed maximum unsigned bytes
+INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_FLAGS_None) // packed minimum unsigned bytes
+INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_FLAGS_None) // packed maximum signed words
+INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_FLAGS_None) // packed minimum signed words
+INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_FLAGS_None) // Packed compare 32-bit integers for equality
+INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_FLAGS_None) // Packed compare 32-bit signed integers for greater than
+INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_FLAGS_None) // Packed compare 16-bit integers for equality
+INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_FLAGS_None) // Packed compare 16-bit signed integers for greater than
+INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_FLAGS_None) // Packed compare 8-bit integers for equality
+INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_FLAGS_None) // Packed compare 8-bit signed integers for greater than
+
+INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_FLAGS_None) // Packed shuffle of 32-bit integers
+INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_FLAGS_None) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
+INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_FLAGS_None) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
+INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_FLAGS_None) // Extract 16-bit value into a r32 with zero extended to 32-bits
+INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_FLAGS_None) // Insert word at index
+
+INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
+INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (lo)
+INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_FLAGS_None) // Packed logical (unsigned) widen uint to ulong (hi)
+INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_FLAGS_None) // Packed logical (unsigned) widen uint to ulong (lo)
+INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_FLAGS_None) // Packed logical (unsigned) widen ushort to uint (hi)
+INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_FLAGS_None) // Packed logical (unsigned) widen ushort to uint (lo)
+INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
+INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_FLAGS_None) // Packed logical (unsigned) widen ubyte to ushort (hi)
+
+INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_FLAGS_None) // Pack (narrow) int to short with saturation
+INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_FLAGS_None) // Pack (narrow) short to byte with saturation
+INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_FLAGS_None) // Pack (narrow) short to unsigned byte with saturation
+INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+
+INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+// id nm um mr mi rm flags
+INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_FLAGS_None) // Packed dot product of two float vector regs
+INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_FLAGS_None) // Packed dot product of two double vector regs
+INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_FLAGS_None) // Insert packed single precision float value
+INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_FLAGS_None) // Packed compare 64-bit integers for equality
+INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_FLAGS_None) // Packed compare 64-bit integers for equality
+INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_FLAGS_None) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
+INST3(ptest, "ptest", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x17), INS_FLAGS_None) // Packed logical compare
+INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_FLAGS_None) // Packed horizontal add
+INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_FLAGS_None) // Packed absolute value of bytes
+INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_FLAGS_None) // Packed absolute value of 16-bit integers
+INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_FLAGS_None) // Packed absolute value of 32-bit integers
+INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_FLAGS_None) // Packed Align Right
+INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_FLAGS_None) // Multiply and Add Packed Signed and Unsigned Bytes
+INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_FLAGS_None) // Packed Multiply High with Round and Scale
+INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_FLAGS_None) // Packed Shuffle Bytes
+INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_FLAGS_None) // Packed SIGN
+INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_FLAGS_None) // Packed SIGN
+INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_FLAGS_None) // Packed SIGN
+INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_FLAGS_None) // packed minimum signed bytes
+INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_FLAGS_None) // packed minimum 32-bit signed integers
+INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_FLAGS_None) // packed minimum 16-bit unsigned integers
+INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_FLAGS_None) // packed minimum 32-bit unsigned integers
+INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_FLAGS_None) // packed maximum signed bytes
+INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_FLAGS_None) // packed maximum 32-bit signed integers
+INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_FLAGS_None) // packed maximum 16-bit unsigned integers
+INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_FLAGS_None) // packed maximum 32-bit unsigned integers
+INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_FLAGS_None) // Packed sign extend byte to short
+INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_FLAGS_None) // Packed sign extend byte to int
+INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_FLAGS_None) // Packed sign extend byte to long
+INST3(pmovsxwd, "pmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_FLAGS_None) // Packed sign extend short to int
+INST3(pmovsxwq, "pmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_FLAGS_None) // Packed sign extend short to long
+INST3(pmovsxdq, "pmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_FLAGS_None) // Packed sign extend int to long
+INST3(pmovzxbw, "pmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_FLAGS_None) // Packed zero extend byte to short
+INST3(pmovzxbd, "pmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_FLAGS_None) // Packed zero extend byte to intg
+INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_FLAGS_None) // Packed zero extend byte to lon
+INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_FLAGS_None) // Packed zero extend short to int
+INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_FLAGS_None) // Packed zero extend short to long
+INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_FLAGS_None) // Packed zero extend int to long
+INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_FLAGS_None) // Pack (narrow) int to unsigned short with saturation
+INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_FLAGS_None) // Round packed single precision floating-point values
+INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_FLAGS_None) // Round scalar single precision floating-point values
+INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_FLAGS_None) // Round packed double precision floating-point values
+INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_FLAGS_None) // Round scalar double precision floating-point values
+INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_FLAGS_None) // packed multiply 32-bit signed integers and store 64-bit result
+INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_FLAGS_None) // Blend Packed Single Precision Floating-Point Values
+INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_FLAGS_None) // Variable Blend Packed Singles
+INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_FLAGS_None) // Blend Packed Double Precision Floating-Point Values
+INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_FLAGS_None) // Variable Blend Packed Doubles
+INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_FLAGS_None) // Blend Packed Words
+INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_FLAGS_None) // Variable Blend Packed Bytes
+INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_FLAGS_None) // Packed horizontal add of 16-bit integers
+INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_FLAGS_None) // Packed horizontal subtract of 16-bit integers
+INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_FLAGS_None) // Packed horizontal subtract of 32-bit integers
+INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_FLAGS_None) // Packed horizontal add of 16-bit integers with saturation
+INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_FLAGS_None) // Packed horizontal subtract of 16-bit integers with saturation
+INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_FLAGS_None) // Load Unaligned integer
+INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_FLAGS_None) // Load Double Quadword Non-Temporal Aligned Hint
+INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_FLAGS_None) // Replicate Double FP Values
+INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_FLAGS_None) // Replicate even-indexed Single FP Values
+INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_FLAGS_None) // Replicate odd-indexed Single FP Values
+INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_FLAGS_None) // Packed Horizontal Word Minimum
+INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_FLAGS_None) // Compute Multiple Packed Sums of Absolute Difference
+INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_FLAGS_None) // Insert Byte
+INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_FLAGS_None) // Insert Dword
+INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_FLAGS_None) // Insert Qword
+INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Byte
+INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Dword
+INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Qword
+INST3(pextrw_sse41, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Word
+INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Packed Floating-Point Values
//AES instructions
-INST3(aesdec, "aesdec" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xDE)) // Perform one round of an AES decryption flow
-INST3(aesdeclast, "aesdeclast" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xDF)) // Perform last round of an AES decryption flow
-INST3(aesenc, "aesenc" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xDC)) // Perform one round of an AES encryption flow
-INST3(aesenclast, "aesenclast" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xDD)) // Perform last round of an AES encryption flow
-INST3(aesimc, "aesimc" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xDB)) // Perform the AES InvMixColumn Transformation
-INST3(aeskeygenassist,"aeskeygenassist" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0xDF)) // AES Round Key Generation Assist
-
-INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-
-INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_FLAGS_None) // Perform one round of an AES decryption flow
+INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_FLAGS_None) // Perform last round of an AES decryption flow
+INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_FLAGS_None) // Perform one round of an AES encryption flow
+INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_FLAGS_None) // Perform last round of an AES encryption flow
+INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation
+INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist
+INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+
+INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// AVX only instructions
-INST3( vbroadcastss, "broadcastss" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x18)) // Broadcast float value read from memory to entire ymm register
-INST3( vbroadcastsd, "broadcastsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x19)) // Broadcast float value read from memory to entire ymm register
-INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x78)) // Broadcast int8 value from reg/memory to entire ymm register
-INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register
-INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register
-INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register
-INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values
-INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, BAD_CODE) // Extract 128-bit packed integer values
-INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values
-INST3( vinserti128, "inserti128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x38)) // Insert 128-bit packed integer values
-INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
-INST3( vperm2i128, "perm2i128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x46)) // Permute 128-bit halves of input register
-INST3( vpermq, "permq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x00)) // Permute 64-bit of input register
-INST3( vpblendd, "pblendd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x02)) // Blend Packed DWORDs
-INST3( vblendvps, "blendvps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4A)) // Variable Blend Packed Singles
-INST3( vblendvpd, "blendvpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4B)) // Variable Blend Packed Doubles
-INST3( vpblendvb, "pblendvb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4C)) // Variable Blend Packed Bytes
-INST3( vtestps, "testps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0E)) // Packed Bit Test
-INST3( vtestpd, "testpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0F)) // Packed Bit Test
-INST3( vpsrlvd, "psrlvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x45)) // Variable Bit Shift Right Logical
-INST3( vpsrlvq, "psrlvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x45)) // Variable Bit Shift Right Logical
-INST3( vpsravd, "psravd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x46)) // Variable Bit Shift Right Arithmetic
-INST3( vpsllvd, "psllvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical
-INST3( vpsllvq, "psllvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical
-INST3( vpermilps, "permilps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x04)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
-INST3( vpermilpd, "permilpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x05)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
-INST3( vpermilpsvar, "permilpsvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0C)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
-INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0D)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
-INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values
-INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register
-INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register
-INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, SSE38(0x2E), BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
-INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, SSE38(0x2F), BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
-INST3(vpmaskmovd, "pmaskmovd" ,0, IUM_WR, 0, 0, SSE38(0x8E), BAD_CODE, SSE38(0x8C)) // Conditional SIMD Integer Packed Dword Loads and Stores
-INST3(vpmaskmovq, "pmaskmovq" ,0, IUM_WR, 0, 0, SSE38(0x8E), BAD_CODE, SSE38(0x8C)) // Conditional SIMD Integer Packed Qword Loads and Stores
-
-INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-// enum name FP updmode rf wf MR MI RM
-INST3(vfmadd132pd, "fmadd132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x98)) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
-INST3(vfmadd213pd, "fmadd213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA8)) //
-INST3(vfmadd231pd, "fmadd231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB8)) //
-INST3(vfmadd132ps, "fmadd132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x98)) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
-INST3(vfmadd213ps, "fmadd213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA8)) //
-INST3(vfmadd231ps, "fmadd231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB8)) //
-INST3(vfmadd132sd, "fmadd132sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x99)) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
-INST3(vfmadd213sd, "fmadd213sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA9)) //
-INST3(vfmadd231sd, "fmadd231sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB9)) //
-INST3(vfmadd132ss, "fmadd132ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x99)) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
-INST3(vfmadd213ss, "fmadd213ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA9)) //
-INST3(vfmadd231ss, "fmadd231ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB9)) //
-INST3(vfmaddsub132pd, "fmaddsub132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x96)) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfmaddsub213pd, "fmaddsub213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA6)) //
-INST3(vfmaddsub231pd, "fmaddsub231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB6)) //
-INST3(vfmaddsub132ps, "fmaddsub132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x96)) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfmaddsub213ps, "fmaddsub213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA6)) //
-INST3(vfmaddsub231ps, "fmaddsub231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB6)) //
-INST3(vfmsubadd132pd, "fmsubadd132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x97)) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
-INST3(vfmsubadd213pd, "fmsubadd213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA7)) //
-INST3(vfmsubadd231pd, "fmsubadd231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB7)) //
-INST3(vfmsubadd132ps, "fmsubadd132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x97)) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
-INST3(vfmsubadd213ps, "fmsubadd213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xA7)) //
-INST3(vfmsubadd231ps, "fmsubadd231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xB7)) //
-INST3(vfmsub132pd, "fmsub132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9A)) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfmsub213pd, "fmsub213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAA)) //
-INST3(vfmsub231pd, "fmsub231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBA)) //
-INST3(vfmsub132ps, "fmsub132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9A)) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfmsub213ps, "fmsub213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAA)) //
-INST3(vfmsub231ps, "fmsub231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBA)) //
-INST3(vfmsub132sd, "fmsub132sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9B)) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
-INST3(vfmsub213sd, "fmsub213sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAB)) //
-INST3(vfmsub231sd, "fmsub231sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBB)) //
-INST3(vfmsub132ss, "fmsub132ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9B)) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
-INST3(vfmsub213ss, "fmsub213ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAB)) //
-INST3(vfmsub231ss, "fmsub231ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBB)) //
-INST3(vfnmadd132pd, "fmnadd132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9C)) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
-INST3(vfnmadd213pd, "fmnadd213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAC)) //
-INST3(vfnmadd231pd, "fmnadd231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBC)) //
-INST3(vfnmadd132ps, "fmnadd132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9C)) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
-INST3(vfnmadd213ps, "fmnadd213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAC)) //
-INST3(vfnmadd231ps, "fmnadd231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBC)) //
-INST3(vfnmadd132sd, "fmnadd132sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9D)) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
-INST3(vfnmadd213sd, "fmnadd213sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAD)) //
-INST3(vfnmadd231sd, "fmnadd231sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBD)) //
-INST3(vfnmadd132ss, "fmnadd132ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9D)) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
-INST3(vfnmadd213ss, "fmnadd213ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAD)) //
-INST3(vfnmadd231ss, "fmnadd231ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBD)) //
-INST3(vfnmsub132pd, "fmnsub132pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9E)) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
-INST3(vfnmsub213pd, "fmnsub213pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAE)) //
-INST3(vfnmsub231pd, "fmnsub231pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBE)) //
-INST3(vfnmsub132ps, "fmnsub132ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9E)) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
-INST3(vfnmsub213ps, "fmnsub213ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAE)) //
-INST3(vfnmsub231ps, "fmnsub231ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBE)) //
-INST3(vfnmsub132sd, "fmnsub132sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9F)) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
-INST3(vfnmsub213sd, "fmnsub213sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAF)) //
-INST3(vfnmsub231sd, "fmnsub231sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBF)) //
-INST3(vfnmsub132ss, "fmnsub132ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x9F)) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
-INST3(vfnmsub213ss, "fmnsub213ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xAF)) //
-INST3(vfnmsub231ss, "fmnsub231ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBF)) //
-INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register
+INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register
+INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_FLAGS_None) // Broadcast int8 value from reg/memory to entire ymm register
+INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_FLAGS_None) // Broadcast int16 value from reg/memory to entire ymm register
+INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_FLAGS_None) // Broadcast int32 value from reg/memory to entire ymm register
+INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_FLAGS_None) // Broadcast int64 value from reg/memory to entire ymm register
+INST3(vextractf128, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed floating point values
+INST3(vextracti128, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed integer values
+INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_FLAGS_None) // Insert 128-bit packed floating point values
+INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_FLAGS_None) // Insert 128-bit packed integer values
+INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_FLAGS_None) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
+INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_FLAGS_None) // Permute 128-bit halves of input register
+INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_FLAGS_None) // Permute 64-bit of input register
+INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_FLAGS_None) // Blend Packed DWORDs
+INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_FLAGS_None) // Variable Blend Packed Singles
+INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_FLAGS_None) // Variable Blend Packed Doubles
+INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_FLAGS_None) // Variable Blend Packed Bytes
+INST3(vtestps, "testps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_FLAGS_None) // Packed Bit Test
+INST3(vtestpd, "testpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_FLAGS_None) // Packed Bit Test
+INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_FLAGS_None) // Variable Bit Shift Right Logical
+INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_FLAGS_None) // Variable Bit Shift Right Logical
+INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_FLAGS_None) // Variable Bit Shift Right Arithmetic
+INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_FLAGS_None) // Variable Bit Shift Left Logical
+INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_FLAGS_None) // Variable Bit Shift Left Logical
+INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
+INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
+INST3(vpermilpsvar, "permilpsvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
+INST3(vpermilpdvar, "permilpdvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
+INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_FLAGS_None) // Permute Floating-Point Values
+INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register
+INST3(vbroadcasti128, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register
+INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_FLAGS_None) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
+INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_FLAGS_None) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
+INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_FLAGS_None) // Conditional SIMD Integer Packed Dword Loads and Stores
+INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_FLAGS_None) // Conditional SIMD Integer Packed Qword Loads and Stores
+
+INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+// id nm um mr mi rm flags
+INST3(vfmadd132pd, "fmadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_FLAGS_None) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
+INST3(vfmadd213pd, "fmadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_FLAGS_None) //
+INST3(vfmadd231pd, "fmadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_FLAGS_None) //
+INST3(vfmadd132ps, "fmadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_FLAGS_None) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
+INST3(vfmadd213ps, "fmadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_FLAGS_None) //
+INST3(vfmadd231ps, "fmadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_FLAGS_None) //
+INST3(vfmadd132sd, "fmadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_FLAGS_None) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
+INST3(vfmadd213sd, "fmadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_FLAGS_None) //
+INST3(vfmadd231sd, "fmadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_FLAGS_None) //
+INST3(vfmadd132ss, "fmadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_FLAGS_None) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
+INST3(vfmadd213ss, "fmadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_FLAGS_None) //
+INST3(vfmadd231ss, "fmadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_FLAGS_None) //
+INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_FLAGS_None) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_FLAGS_None) //
+INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_FLAGS_None) //
+INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_FLAGS_None) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_FLAGS_None) //
+INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_FLAGS_None) //
+INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_FLAGS_None) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
+INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_FLAGS_None) //
+INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_FLAGS_None) //
+INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_FLAGS_None) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
+INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_FLAGS_None) //
+INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_FLAGS_None) //
+INST3(vfmsub132pd, "fmsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_FLAGS_None) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfmsub213pd, "fmsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_FLAGS_None) //
+INST3(vfmsub231pd, "fmsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_FLAGS_None) //
+INST3(vfmsub132ps, "fmsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_FLAGS_None) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfmsub213ps, "fmsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_FLAGS_None) //
+INST3(vfmsub231ps, "fmsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_FLAGS_None) //
+INST3(vfmsub132sd, "fmsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_FLAGS_None) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
+INST3(vfmsub213sd, "fmsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_FLAGS_None) //
+INST3(vfmsub231sd, "fmsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_FLAGS_None) //
+INST3(vfmsub132ss, "fmsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_FLAGS_None) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
+INST3(vfmsub213ss, "fmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_FLAGS_None) //
+INST3(vfmsub231ss, "fmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_FLAGS_None) //
+INST3(vfnmadd132pd, "fmnadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_FLAGS_None) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
+INST3(vfnmadd213pd, "fmnadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_FLAGS_None) //
+INST3(vfnmadd231pd, "fmnadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_FLAGS_None) //
+INST3(vfnmadd132ps, "fmnadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_FLAGS_None) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
+INST3(vfnmadd213ps, "fmnadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_FLAGS_None) //
+INST3(vfnmadd231ps, "fmnadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_FLAGS_None) //
+INST3(vfnmadd132sd, "fmnadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_FLAGS_None) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
+INST3(vfnmadd213sd, "fmnadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_FLAGS_None) //
+INST3(vfnmadd231sd, "fmnadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_FLAGS_None) //
+INST3(vfnmadd132ss, "fmnadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_FLAGS_None) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
+INST3(vfnmadd213ss, "fmnadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_FLAGS_None) //
+INST3(vfnmadd231ss, "fmnadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_FLAGS_None) //
+INST3(vfnmsub132pd, "fmnsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
+INST3(vfnmsub213pd, "fmnsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_FLAGS_None) //
+INST3(vfnmsub231pd, "fmnsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_FLAGS_None) //
+INST3(vfnmsub132ps, "fmnsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
+INST3(vfnmsub213ps, "fmnsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_FLAGS_None) //
+INST3(vfnmsub231ps, "fmnsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_FLAGS_None) //
+INST3(vfnmsub132sd, "fmnsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
+INST3(vfnmsub213sd, "fmnsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_FLAGS_None) //
+INST3(vfnmsub231sd, "fmnsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_FLAGS_None) //
+INST3(vfnmsub132ss, "fmnsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_FLAGS_None) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
+INST3(vfnmsub213ss, "fmnsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_FLAGS_None) //
+INST3(vfnmsub231ss, "fmnsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_FLAGS_None) //
+INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// BMI1
-INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-INST3(andn, "andn", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF2)) // Logical AND NOT
-INST3(blsi, "blsi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Extract Lowest Set Isolated Bit
-INST3(blsmsk, "blsmsk", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Get Mask Up to Lowest Set Bit
-INST3(blsr, "blsr", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Reset Lowest Set Bit
+INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_FLAGS_None) // Logical AND NOT
+INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Extract Lowest Set Isolated Bit
+INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Get Mask Up to Lowest Set Bit
+INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_FLAGS_None) // Reset Lowest Set Bit
// BMI2
-INST3(pdep, "pdep", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF5)) // Parallel Bits Deposit
-INST3(pext, "pext", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF5)) // Parallel Bits Extract
-INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_None) // Parallel Bits Deposit
+INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_None) // Parallel Bits Extract
+INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
-INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// Scalar instructions in SSE4.2
-INST3( crc32, "crc32" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0))
+INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_FLAGS_None)
// BMI1
-INST3( tzcnt, "tzcnt" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xBC)) // Count the Number of Trailing Zero Bits
+INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_FLAGS_None) // Count the Number of Trailing Zero Bits
// LZCNT
-INST3( lzcnt, "lzcnt" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xBD))
+INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_None)
// POPCNT
-INST3( popcnt, "popcnt" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xB8))
-
-// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon
-
-INST2(ret , "ret" , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
-INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
-INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
-
-INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
-INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
-INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
-INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
-INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
-INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
-
-INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
-INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
-INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
-INST2(rcr , "rcr" , 0, IUM_RW, 1, 1, 0x0018D2, BAD_CODE)
-INST2(rcr_1 , "rcr" , 0, IUM_RW, 1, 1, 0x0018D0, 0x0018D0)
-INST2(rcr_N , "rcr" , 0, IUM_RW, 1, 1, 0x0018C0, 0x0018C0)
-INST2(shl , "shl" , 0, IUM_RW, 0, 1, 0x0020D2, BAD_CODE)
-INST2(shl_1 , "shl" , 0, IUM_RW, 0, 1, 0x0020D0, 0x0020D0)
-INST2(shl_N , "shl" , 0, IUM_RW, 0, 1, 0x0020C0, 0x0020C0)
-INST2(shr , "shr" , 0, IUM_RW, 0, 1, 0x0028D2, BAD_CODE)
-INST2(shr_1 , "shr" , 0, IUM_RW, 0, 1, 0x0028D0, 0x0028D0)
-INST2(shr_N , "shr" , 0, IUM_RW, 0, 1, 0x0028C0, 0x0028C0)
-INST2(sar , "sar" , 0, IUM_RW, 0, 1, 0x0038D2, BAD_CODE)
-INST2(sar_1 , "sar" , 0, IUM_RW, 0, 1, 0x0038D0, 0x0038D0)
-INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0)
-
-
-// enum name FP updmode rf wf R/M,R/M[reg]
-
-INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3)
-INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3)
+INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_None)
+
+// id nm um mr mi flags
+INST2(ret, "ret", IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None)
+INST2(loop, "loop", IUM_RD, BAD_CODE, 0x0000E2, INS_FLAGS_None)
+INST2(call, "call", IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_WritesFlags)
+
+INST2(rol, "rol", IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesFlags)
+INST2(rol_1, "rol", IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesFlags)
+INST2(rol_N, "rol", IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesFlags)
+INST2(ror, "ror", IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesFlags)
+INST2(ror_1, "ror", IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesFlags)
+INST2(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesFlags)
+
+INST2(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
+INST2(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesFlags)
+INST2(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesFlags)
+INST2(shl_N, "shl", IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesFlags)
+INST2(shr, "shr", IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesFlags)
+INST2(shr_1, "shr", IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesFlags)
+INST2(shr_N, "shr", IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesFlags)
+INST2(sar, "sar", IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesFlags)
+INST2(sar_1, "sar", IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesFlags)
+INST2(sar_N, "sar", IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesFlags)
+
+
+// id nm um mr flags
+INST1(r_movsb, "rep movsb", IUM_RD, 0x00A4F3, INS_FLAGS_None)
+INST1(r_movsd, "rep movsd", IUM_RD, 0x00A5F3, INS_FLAGS_None)
#if defined(_TARGET_AMD64_)
-INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548)
+INST1(r_movsq, "rep movsq", IUM_RD, 0xF3A548, INS_FLAGS_None)
#endif // defined(_TARGET_AMD64_)
-INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4)
-INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5)
+INST1(movsb, "movsb", IUM_RD, 0x0000A4, INS_FLAGS_None)
+INST1(movsd, "movsd", IUM_RD, 0x0000A5, INS_FLAGS_None)
#if defined(_TARGET_AMD64_)
-INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548)
+INST1(movsq, "movsq", IUM_RD, 0x00A548, INS_FLAGS_None)
#endif // defined(_TARGET_AMD64_)
-INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3)
-INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3)
+INST1(r_stosb, "rep stosb", IUM_RD, 0x00AAF3, INS_FLAGS_None)
+INST1(r_stosd, "rep stosd", IUM_RD, 0x00ABF3, INS_FLAGS_None)
#if defined(_TARGET_AMD64_)
-INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48)
+INST1(r_stosq, "rep stosq", IUM_RD, 0xF3AB48, INS_FLAGS_None)
#endif // defined(_TARGET_AMD64_)
-INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA)
-INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB)
+INST1(stosb, "stosb", IUM_RD, 0x0000AA, INS_FLAGS_None)
+INST1(stosd, "stosd", IUM_RD, 0x0000AB, INS_FLAGS_None)
#if defined(_TARGET_AMD64_)
-INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48)
+INST1(stosq, "stosq", IUM_RD, 0x00AB48, INS_FLAGS_None)
#endif // defined(_TARGET_AMD64_)
-INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC)
-INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090)
-INST1(lock , "lock" , 0, IUM_RD, 0, 0, 0x0000F0)
-INST1(leave , "leave" , 0, IUM_RD, 0, 0, 0x0000C9)
+INST1(int3, "int3", IUM_RD, 0x0000CC, INS_FLAGS_None)
+INST1(nop, "nop", IUM_RD, 0x000090, INS_FLAGS_None)
+INST1(lock, "lock", IUM_RD, 0x0000F0, INS_FLAGS_None)
+INST1(leave, "leave", IUM_RD, 0x0000C9, INS_FLAGS_None)
-INST1(neg , "neg" , 0, IUM_RW, 0, 1, 0x0018F6)
-INST1(not , "not" , 0, IUM_RW, 0, 1, 0x0010F6)
+INST1(neg, "neg", IUM_RW, 0x0018F6, INS_FLAGS_WritesFlags)
+INST1(not, "not", IUM_RW, 0x0010F6, INS_FLAGS_WritesFlags)
-INST1(cdq , "cdq" , 0, IUM_RD, 0, 1, 0x000099)
-INST1(idiv , "idiv" , 0, IUM_RD, 0, 1, 0x0038F6)
-INST1(imulEAX, "imul" , 0, IUM_RD, 0, 1, 0x0028F6) // edx:eax = eax*op1
-INST1(div , "div" , 0, IUM_RD, 0, 1, 0x0030F6)
-INST1(mulEAX , "mul" , 0, IUM_RD, 0, 1, 0x0020F6)
+INST1(cdq, "cdq", IUM_RD, 0x000099, INS_FLAGS_WritesFlags)
+INST1(idiv, "idiv", IUM_RD, 0x0038F6, INS_FLAGS_WritesFlags)
+INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_FLAGS_WritesFlags) // edx:eax = eax*op1
+INST1(div, "div", IUM_RD, 0x0030F6, INS_FLAGS_WritesFlags)
+INST1(mulEAX, "mul", IUM_RD, 0x0020F6, INS_FLAGS_WritesFlags)
-INST1(sahf , "sahf" , 0, IUM_RD, 0, 1, 0x00009E)
+INST1(sahf, "sahf", IUM_RD, 0x00009E, INS_FLAGS_WritesFlags)
-INST1(xadd , "xadd" , 0, IUM_RW, 0, 1, 0x0F00C0)
-INST1(cmpxchg, "cmpxchg" , 0, IUM_RW, 0, 1, 0x0F00B0)
+INST1(xadd, "xadd", IUM_RW, 0x0F00C0, INS_FLAGS_WritesFlags)
+INST1(cmpxchg, "cmpxchg", IUM_RW, 0x0F00B0, INS_FLAGS_WritesFlags)
-INST1(shld , "shld" , 0, IUM_RW, 0, 1, 0x0F00A4)
-INST1(shrd , "shrd" , 0, IUM_RW, 0, 1, 0x0F00AC)
+INST1(shld, "shld", IUM_RW, 0x0F00A4, INS_FLAGS_WritesFlags)
+INST1(shrd, "shrd", IUM_RW, 0x0F00AC, INS_FLAGS_WritesFlags)
// For RyuJIT/x86, we follow the x86 calling convention that requires
// us to return floating point value on the x87 FP stack, so we need
// these instructions regardless of whether we're using full stack fp.
#ifdef _TARGET_X86_
-INST1(fld , "fld" , 1, IUM_WR, 0, 0, 0x0000D9)
-INST1(fstp , "fstp" , 1, IUM_WR, 0, 0, 0x0018D9)
+INST1(fld, "fld", IUM_WR, 0x0000D9, INS_FLAGS_x87Instr)
+INST1(fstp, "fstp", IUM_WR, 0x0018D9, INS_FLAGS_x87Instr)
#endif // _TARGET_X86
-INST1(seto , "seto" , 0, IUM_WR, 1, 0, 0x0F0090)
-INST1(setno , "setno" , 0, IUM_WR, 1, 0, 0x0F0091)
-INST1(setb , "setb" , 0, IUM_WR, 1, 0, 0x0F0092)
-INST1(setae , "setae" , 0, IUM_WR, 1, 0, 0x0F0093)
-INST1(sete , "sete" , 0, IUM_WR, 1, 0, 0x0F0094)
-INST1(setne , "setne" , 0, IUM_WR, 1, 0, 0x0F0095)
-INST1(setbe , "setbe" , 0, IUM_WR, 1, 0, 0x0F0096)
-INST1(seta , "seta" , 0, IUM_WR, 1, 0, 0x0F0097)
-INST1(sets , "sets" , 0, IUM_WR, 1, 0, 0x0F0098)
-INST1(setns , "setns" , 0, IUM_WR, 1, 0, 0x0F0099)
-INST1(setpe , "setpe" , 0, IUM_WR, 1, 0, 0x0F009A)
-INST1(setpo , "setpo" , 0, IUM_WR, 1, 0, 0x0F009B)
-INST1(setl , "setl" , 0, IUM_WR, 1, 0, 0x0F009C)
-INST1(setge , "setge" , 0, IUM_WR, 1, 0, 0x0F009D)
-INST1(setle , "setle" , 0, IUM_WR, 1, 0, 0x0F009E)
-INST1(setg , "setg" , 0, IUM_WR, 1, 0, 0x0F009F)
+INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsFlags)
+INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsFlags)
+INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsFlags)
+INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsFlags)
+INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsFlags)
+INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsFlags)
+INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsFlags)
+INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsFlags)
+INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsFlags)
+INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsFlags)
+INST1(setpe, "setpe", IUM_WR, 0x0F009A, INS_FLAGS_ReadsFlags)
+INST1(setpo, "setpo", IUM_WR, 0x0F009B, INS_FLAGS_ReadsFlags)
+INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsFlags)
+INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsFlags)
+INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsFlags)
+INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsFlags)
#ifdef _TARGET_AMD64_
// A jump with rex prefix. This is used for register indirect
// tail calls.
-INST1(rex_jmp, "rex.jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
+INST1(rex_jmp, "rex.jmp", IUM_RD, 0x0020FE, INS_FLAGS_None)
#endif
-INST1(i_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
-
-INST0(jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000EB)
-INST0(jo , "jo" , 0, IUM_RD, 1, 0, 0x000070)
-INST0(jno , "jno" , 0, IUM_RD, 1, 0, 0x000071)
-INST0(jb , "jb" , 0, IUM_RD, 1, 0, 0x000072)
-INST0(jae , "jae" , 0, IUM_RD, 1, 0, 0x000073)
-INST0(je , "je" , 0, IUM_RD, 1, 0, 0x000074)
-INST0(jne , "jne" , 0, IUM_RD, 1, 0, 0x000075)
-INST0(jbe , "jbe" , 0, IUM_RD, 1, 0, 0x000076)
-INST0(ja , "ja" , 0, IUM_RD, 1, 0, 0x000077)
-INST0(js , "js" , 0, IUM_RD, 1, 0, 0x000078)
-INST0(jns , "jns" , 0, IUM_RD, 1, 0, 0x000079)
-INST0(jpe , "jpe" , 0, IUM_RD, 1, 0, 0x00007A)
-INST0(jpo , "jpo" , 0, IUM_RD, 1, 0, 0x00007B)
-INST0(jl , "jl" , 0, IUM_RD, 1, 0, 0x00007C)
-INST0(jge , "jge" , 0, IUM_RD, 1, 0, 0x00007D)
-INST0(jle , "jle" , 0, IUM_RD, 1, 0, 0x00007E)
-INST0(jg , "jg" , 0, IUM_RD, 1, 0, 0x00007F)
-
-INST0(l_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000E9)
-INST0(l_jo , "jo" , 0, IUM_RD, 1, 0, 0x00800F)
-INST0(l_jno , "jno" , 0, IUM_RD, 1, 0, 0x00810F)
-INST0(l_jb , "jb" , 0, IUM_RD, 1, 0, 0x00820F)
-INST0(l_jae , "jae" , 0, IUM_RD, 1, 0, 0x00830F)
-INST0(l_je , "je" , 0, IUM_RD, 1, 0, 0x00840F)
-INST0(l_jne , "jne" , 0, IUM_RD, 1, 0, 0x00850F)
-INST0(l_jbe , "jbe" , 0, IUM_RD, 1, 0, 0x00860F)
-INST0(l_ja , "ja" , 0, IUM_RD, 1, 0, 0x00870F)
-INST0(l_js , "js" , 0, IUM_RD, 1, 0, 0x00880F)
-INST0(l_jns , "jns" , 0, IUM_RD, 1, 0, 0x00890F)
-INST0(l_jpe , "jpe" , 0, IUM_RD, 1, 0, 0x008A0F)
-INST0(l_jpo , "jpo" , 0, IUM_RD, 1, 0, 0x008B0F)
-INST0(l_jl , "jl" , 0, IUM_RD, 1, 0, 0x008C0F)
-INST0(l_jge , "jge" , 0, IUM_RD, 1, 0, 0x008D0F)
-INST0(l_jle , "jle" , 0, IUM_RD, 1, 0, 0x008E0F)
-INST0(l_jg , "jg" , 0, IUM_RD, 1, 0, 0x008F0F)
-
-INST0(align , "align" , 0, IUM_RD, 0, 0, BAD_CODE)
+INST1(i_jmp, "jmp", IUM_RD, 0x0020FE, INS_FLAGS_None)
+
+INST0(jmp, "jmp", IUM_RD, 0x0000EB, INS_FLAGS_None)
+INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsFlags)
+INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsFlags)
+INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsFlags)
+INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsFlags)
+INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsFlags)
+INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsFlags)
+INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsFlags)
+INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsFlags)
+INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsFlags)
+INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsFlags)
+INST0(jpe, "jpe", IUM_RD, 0x00007A, INS_FLAGS_ReadsFlags)
+INST0(jpo, "jpo", IUM_RD, 0x00007B, INS_FLAGS_ReadsFlags)
+INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsFlags)
+INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsFlags)
+INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsFlags)
+INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsFlags)
+
+INST0(l_jmp, "jmp", IUM_RD, 0x0000E9, INS_FLAGS_None)
+INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsFlags)
+INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsFlags)
+INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsFlags)
+INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsFlags)
+INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsFlags)
+INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsFlags)
+INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsFlags)
+INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsFlags)
+INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsFlags)
+INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsFlags)
+INST0(l_jpe, "jpe", IUM_RD, 0x008A0F, INS_FLAGS_ReadsFlags)
+INST0(l_jpo, "jpo", IUM_RD, 0x008B0F, INS_FLAGS_ReadsFlags)
+INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsFlags)
+INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsFlags)
+INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsFlags)
+INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsFlags)
+
+INST0(align, "align", IUM_RD, BAD_CODE, INS_FLAGS_None)
/*****************************************************************************/
#undef INST0