diff options
author | Jin Kyu Song <jin.kyu.song@intel.com> | 2013-09-13 14:12:57 -0700 |
---|---|---|
committer | Cyrill Gorcunov <gorcunov@gmail.com> | 2013-09-14 01:27:10 +0400 |
commit | eb595942b2c9421548d110e511d12823f38cffbf (patch) | |
tree | 61c470c56634bcfe31b4519eb9870d20c9c8d157 | |
parent | dd1c0c13c80aa9b034dc3755e2ccc451c63ec6a4 (diff) | |
download | nasm-eb595942b2c9421548d110e511d12823f38cffbf.tar.gz nasm-eb595942b2c9421548d110e511d12823f38cffbf.tar.bz2 nasm-eb595942b2c9421548d110e511d12823f38cffbf.zip |
AVX-512: Added AVX-512PF instructions
Added Prefetch (AVX-512PF) instructions.
These instructions are supported
if CPUID.(EAX=07H, ECX=0):EBX.AVX512PF[bit 26] = 1.
CPUID feature flag for PREFETCHWT1 is TBD
but PREFETCHWT1 is included in this commit.
Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
-rw-r--r-- | insns.dat | 18 | ||||
-rw-r--r-- | insns.h | 1 | ||||
-rw-r--r-- | test/avx512pf.asm | 87 |
3 files changed, 106 insertions, 0 deletions
@@ -4074,6 +4074,24 @@ VRSQRT28PD zmmreg|mask|z,zmmrm512|b64|sae [rm:fv: evex.512.66.0f38 VRSQRT28PS zmmreg|mask|z,zmmrm512|b32|sae [rm:fv: evex.512.66.0f38.w0 cc /r ] AVX512ER,FUTURE VRSQRT28SD xmmreg|mask|z,xmmreg,xmmrm64|sae [rvm:t1s: evex.nds.lig.66.0f38.w1 cd /r ] AVX512ER,FUTURE VRSQRT28SS xmmreg|mask|z,xmmreg,xmmrm32|sae [rvm:t1s: evex.nds.lig.66.0f38.w0 cd /r ] AVX512ER,FUTURE +; AVX-512PF (Prefetch) instructions +VGATHERPF0DPD ymem64|mask [m:t1s: vsiby evex.512.66.0f38.w1 c6 /1 ] AVX512PF,FUTURE +VGATHERPF0DPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c6 /1 ] AVX512PF,FUTURE +VGATHERPF0QPD zmem64|mask [m:t1s: vsibz evex.512.66.0f38.w1 c7 /1 ] AVX512PF,FUTURE +VGATHERPF0QPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c7 /1 ] AVX512PF,FUTURE +VGATHERPF1DPD ymem64|mask [m:t1s: vsiby evex.512.66.0f38.w1 c6 /2 ] AVX512PF,FUTURE +VGATHERPF1DPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c6 /2 ] AVX512PF,FUTURE +VGATHERPF1QPD zmem64|mask [m:t1s: vsibz evex.512.66.0f38.w1 c7 /2 ] AVX512PF,FUTURE +VGATHERPF1QPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c7 /2 ] AVX512PF,FUTURE +VSCATTERPF0DPD ymem64|mask [m:t1s: vsiby evex.512.66.0f38.w1 c6 /5 ] AVX512PF,FUTURE +VSCATTERPF0DPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c6 /5 ] AVX512PF,FUTURE +VSCATTERPF0QPD zmem64|mask [m:t1s: vsibz evex.512.66.0f38.w1 c7 /5 ] AVX512PF,FUTURE +VSCATTERPF0QPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c7 /5 ] AVX512PF,FUTURE +VSCATTERPF1DPD ymem64|mask [m:t1s: vsiby evex.512.66.0f38.w1 c6 /6 ] AVX512PF,FUTURE +VSCATTERPF1DPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c6 /6 ] AVX512PF,FUTURE +VSCATTERPF1QPD zmem64|mask [m:t1s: vsibz evex.512.66.0f38.w1 c7 /6 ] AVX512PF,FUTURE +VSCATTERPF1QPS zmem32|mask [m:t1s: vsibz evex.512.66.0f38.w0 c7 /6 ] AVX512PF,FUTURE +PREFETCHWT1 mem8 [m: 0f 0d /2 ] FUTURE ;# Systematic names for the hinting nop instructions @@ -130,6 +130,7 @@ extern const uint8_t nasm_bytecodes[]; #define IF_INVPCID 0x1500000000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_AVX512CD (0x1600000000UL|IF_AVX512) /* AVX-512 Conflict Detection insns */ #define IF_AVX512ER (0x1700000000UL|IF_AVX512) /* AVX-512 Exponential and Reciprocal */ +#define IF_AVX512PF (0x1800000000UL|IF_AVX512) /* AVX-512 Prefetch instructions */ #define IF_INSMASK 0xFF00000000UL /* the mask for instruction set types */ #define IF_PMASK 0xFF000000UL /* the mask for processor types */ #define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */ diff --git a/test/avx512pf.asm b/test/avx512pf.asm new file mode 100644 index 0000000..5227123 --- /dev/null +++ b/test/avx512pf.asm @@ -0,0 +1,87 @@ +; AVX-512PF testcases from gas +;------------------------ +; +; This file is taken from there +; https://gnu.googlesource.com/binutils/+/master/gas/testsuite/gas/i386/x86-64-avx512pf-intel.d +; So the original author is "H.J. Lu" <hongjiu dot lu at intel dot com> +; +; Jin Kyu Song converted it for the nasm testing suite using gas2nasm.py + +%macro testcase 2 + %ifdef BIN + db %1 + %endif + %ifdef SRC + %2 + %endif +%endmacro + + +bits 64 + +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x4c, 0x39, 0x20 }, { vgatherpf0dpd [r9+ymm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc6, 0x8c, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf0dpd [rcx+ymm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x4c, 0x39, 0x40 }, { vgatherpf0dps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc6, 0x8c, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf0dps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x4c, 0x39, 0x20 }, { vgatherpf0qpd [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc7, 0x8c, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf0qpd [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf0qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x4c, 0x39, 0x40 }, { vgatherpf0qps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc7, 0x8c, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf0qps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x54, 0x39, 0x20 }, { vgatherpf1dpd [r9+ymm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc6, 0x94, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf1dpd [rcx+ymm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x54, 0x39, 0x40 }, { vgatherpf1dps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc6, 0x94, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf1dps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x54, 0x39, 0x20 }, { vgatherpf1qpd [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc7, 0x94, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf1qpd [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vgatherpf1qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x54, 0x39, 0x40 }, { vgatherpf1qps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc7, 0x94, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vgatherpf1qps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x6c, 0x39, 0x20 }, { vscatterpf0dpd [r9+ymm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc6, 0xac, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf0dpd [rcx+ymm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x6c, 0x39, 0x40 }, { vscatterpf0dps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc6, 0xac, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf0dps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x6c, 0x39, 0x20 }, { vscatterpf0qpd [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc7, 0xac, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf0qpd [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf0qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x6c, 0x39, 0x40 }, { vscatterpf0qps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc7, 0xac, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf0qps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1dpd [r14+ymm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc6, 0x74, 0x39, 0x20 }, { vscatterpf1dpd [r9+ymm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc6, 0xb4, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf1dpd [rcx+ymm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1dps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc6, 0x74, 0x39, 0x40 }, { vscatterpf1dps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc6, 0xb4, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf1dps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1qpd [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0xfd, 0x41, 0xc7, 0x74, 0x39, 0x20 }, { vscatterpf1qpd [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0xfd, 0x41, 0xc7, 0xb4, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf1qpd [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00 }, { vscatterpf1qps [r14+zmm31*8+0x7b]\{k1\} } +testcase { 0x62, 0x92, 0x7d, 0x41, 0xc7, 0x74, 0x39, 0x40 }, { vscatterpf1qps [r9+zmm31*1+0x100]\{k1\} } +testcase { 0x62, 0xb2, 0x7d, 0x41, 0xc7, 0xb4, 0xb9, 0x00, 0x04, 0x00, 0x00 }, { vscatterpf1qps [rcx+zmm31*4+0x400]\{k1\} } +testcase { 0x0f, 0x0d, 0x11 }, { prefetchwt1 BYTE [rcx] } +testcase { 0x42, 0x0f, 0x0d, 0x94, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { prefetchwt1 BYTE [rax+r14*8+0x123] } |