diff options
author | H. Peter Anvin <hpa@zytor.com> | 2007-09-17 17:25:27 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2007-09-17 17:25:27 -0700 |
commit | cf5180a9553e43bbaa46fd1a77c75dc8b7f6da42 (patch) | |
tree | e49a78cca72852670b210bce53f4c5698fc4c7bf | |
parent | 401c07e20d14130a2d147468a408fce9edd1faff (diff) | |
download | nasm-cf5180a9553e43bbaa46fd1a77c75dc8b7f6da42.tar.gz nasm-cf5180a9553e43bbaa46fd1a77c75dc8b7f6da42.tar.bz2 nasm-cf5180a9553e43bbaa46fd1a77c75dc8b7f6da42.zip |
Actually generate SSE5 instructions
This checkin completes what is required to actually generate SSE5
instructions. No support in the disassembler yet.
This checkin covers:
- Support for actually generating DREX prefixes.
- Support for matching operand "operand X must match Y"
-rw-r--r-- | assemble.c | 19 | ||||
-rw-r--r-- | insns.dat | 18 | ||||
-rw-r--r-- | insns.h | 3 | ||||
-rw-r--r-- | insns.pl | 1 | ||||
-rw-r--r-- | nasm.h | 12 | ||||
-rw-r--r-- | test/fmsub.asm | 16 |
6 files changed, 63 insertions, 6 deletions
@@ -856,6 +856,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 0163: length++; ins->rex |= REX_D; + ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0164: case 0165: @@ -863,6 +864,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 0167: length++; ins->rex |= REX_D|REX_OC; + ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0170: length++; @@ -974,6 +976,11 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, errfunc(ERR_NONFATAL, "cannot use high register in drex instruction"); return -1; } + if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) || + ins->drexdst > 7)) { + errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); + return -1; + } length++; } else if (ins->rex & REX_REAL) { if (ins->rex & REX_H) { @@ -985,8 +992,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, cpu >= IF_X86_64)) { length++; } else { - errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); - return -1; + errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); + return -1; } } @@ -1358,7 +1365,6 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 0165: case 0166: case 0167: - ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0170: @@ -1663,7 +1669,12 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) * Check that the operand flags all match up */ for (i = 0; i < itemp->operands; i++) { - if (itemp->opd[i] & ~instruction->oprs[i].type || + if (itemp->opd[i] & SAME_AS) { + int j = itemp->opd[i] & ~SAME_AS; + if (instruction->oprs[i].type != instruction->oprs[j].type || + instruction->oprs[i].basereg != instruction->oprs[j].basereg) + return 0; + } else if (itemp->opd[i] & ~instruction->oprs[i].type || ((itemp->opd[i] & SIZE_MASK) && ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) { if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) || @@ -2020,3 +2020,21 @@ PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42 POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64 + +; AMD SSE5 instructions +FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5 +FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5 +FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5 +FMSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0C\112 SSE5 +FMSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x09\132 SSE5 +FMSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x09\123 SSE5 +FMSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0D\121 SSE5 +FMSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0D\112 SSE5 +FMSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0A\132 SSE5 +FMSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0A\123 SSE5 +FMSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0E\121 SSE5 +FMSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0E\112 SSE5 +FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5 +FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5 +FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5 +FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5 @@ -21,7 +21,7 @@ struct itemplate { enum opcode opcode; /* the token, passed from "parser.c" */ int operands; /* number of operands */ - int32_t opd[MAX_OPERANDS]; /* bit flags for operand types */ + opflags_t opd[MAX_OPERANDS]; /* bit flags for operand types */ const char *code; /* the code it assembles to */ uint32_t flags; /* some flags */ }; @@ -90,6 +90,7 @@ extern const struct itemplate * const * const itable[]; #define IF_SSSE3 0x00200000UL /* it's an SSSE3 instruction */ #define IF_SSE41 0x00400000UL /* it's an SSE4.1 instruction */ #define IF_SSE42 0x00800000UL /* it's an SSE4.2 instruction */ +#define IF_SSE5 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_PMASK 0xFF000000UL /* the mask for processor types */ #define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */ /* also the highest possible processor */ @@ -218,6 +218,7 @@ sub format { $operands =~ s/rm(\d+)/rm_gpr|bits$1/g; $operands =~ s/mmxrm/rm_mmx/g; $operands =~ s/xmmrm/rm_xmm/g; + $operands =~ s/\=([0-9]+)/same_as|$1/g; if ($operands eq 'void') { @ops = (); } else { @@ -438,9 +438,16 @@ enum { * 25: RM_MMX (MMXREG) * 26: RM_XMM (XMMREG) * - * Bits 27-31 are currently unallocated. + * Bits 27-29 & 31 are currently unallocated. + * + * 30: SAME_AS + * Special flag only used in instruction patterns; means this operand + * has to be identical to another operand. Currently only supported + * for registers. */ +typedef uint32_t opflags_t; + /* Size, and other attributes, of the operand */ #define BITS8 0x00000001L #define BITS16 0x00000002L @@ -527,6 +534,9 @@ enum { #define UNITY 0x00012000L /* for shift/rotate instructions */ #define SBYTE 0x00022000L /* for op r16/32,immediate instrs. */ +/* special flags */ +#define SAME_AS 0x40000000L + /* Register names automatically generated from regs.dat */ #include "regs.h" diff --git a/test/fmsub.asm b/test/fmsub.asm new file mode 100644 index 0000000..7f087cd --- /dev/null +++ b/test/fmsub.asm @@ -0,0 +1,16 @@ + bits 64 + + fmsubps xmm0,xmm0,xmm1,xmm2 + fmsubps xmm0,xmm0,xmm1,[rax] + fmsubps xmm0,xmm0,xmm1,[rax+0x77] + fmsubps xmm0,xmm0,xmm1,[rax+0x7777] + fmsubps xmm1,xmm2,xmm3,xmm1 + fmsubps xmm1,xmm2,[rax],xmm1 + fmsubps xmm1,xmm2,[rax+0x77],xmm1 + fmsubps xmm1,xmm2,[rax+0x7777],xmm1 + fmsubps xmm0,[rax],xmm2,xmm0 + fmsubps xmm0,[rax+0x77],xmm2,xmm0 + fmsubps xmm0,[rax+0x7777],xmm2,xmm0 + fmsubps xmm14,[rax],xmm2,xmm14 + fmsubps xmm14,[rax+0x77],xmm2,xmm14 + fmsubps xmm14,[rax+0x7777],xmm2,xmm14 |