/* ----------------------------------------------------------------------- * * * Copyright 1996-2013 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following * conditions are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ----------------------------------------------------------------------- */ /* * assemble.c code generation for the Netwide Assembler * * the actual codes (C syntax, i.e. octal): * \0 - terminates the code. (Unless it's a literal of course.) * \1..\4 - that many literal bytes follow in the code stream * \5 - add 4 to the primary operand number (b, low octdigit) * \6 - add 4 to the secondary operand number (a, middle octdigit) * \7 - add 4 to both the primary and the secondary operand number * \10..\13 - a literal byte follows in the code stream, to be added * to the register value of operand 0..3 * \20..\23 - a byte immediate operand, from operand 0..3 * \24..\27 - a zero-extended byte immediate operand, from operand 0..3 * \30..\33 - a word immediate operand, from operand 0..3 * \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit * assembly mode or the operand-size override on the operand * \40..\43 - a long immediate operand, from operand 0..3 * \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7] * depending on the address size of the instruction. * \50..\53 - a byte relative operand, from operand 0..3 * \54..\57 - a qword immediate operand, from operand 0..3 * \60..\63 - a word relative operand, from operand 0..3 * \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit * assembly mode or the operand-size override on the operand * \70..\73 - a long relative operand, from operand 0..3 * \74..\77 - a word constant, from the _segment_ part of operand 0..3 * \1ab - a ModRM, calculated on EA in operand a, with the spare * field the register value of operand b. * \172\ab - the register number from operand a in bits 7..4, with * the 4-bit immediate from operand b in bits 3..0. * \173\xab - the register number from operand a in bits 7..4, with * the value b in bits 3..0. * \174..\177 - the register number from operand 0..3 in bits 7..4, and * an arbitrary value in bits 3..0 (assembled as zero.) * \2ab - a ModRM, calculated on EA in operand a, with the spare * field equal to digit b. * * \240..\243 - this instruction uses EVEX rather than REX or VEX/XOP, with the * V field taken from operand 0..3. * \250 - this instruction uses EVEX rather than REX or VEX/XOP, with the * V field set to 1111b. * EVEX prefixes are followed by the sequence: * \cm\wlp\tup where cm is: * cc 000 0mm * c = 2 for EVEX and m is the legacy escape (0f, 0f38, 0f3a) * and wlp is: * 00 wwl lpp * [l0] ll = 0 (.128, .lz) * [l1] ll = 1 (.256) * [l2] ll = 2 (.512) * [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0) * * [w0] ww = 0 for W = 0 * [w1] ww = 1 for W = 1 * [wig] ww = 2 for W don't care (always assembled as 0) * [ww] ww = 3 for W used as REX.W * * [p0] pp = 0 for no prefix * [60] pp = 1 for legacy prefix 60 * [f3] pp = 2 * [f2] pp = 3 * * tup is tuple type for Disp8*N from %tuple_codes in insns.pl * (compressed displacement encoding) * * \254..\257 - a signed 32-bit operand to be extended to 64 bits. * \260..\263 - this instruction uses VEX/XOP rather than REX, with the * V field taken from operand 0..3. * \270 - this instruction uses VEX/XOP rather than REX, with the * V field set to 1111b. * * VEX/XOP prefixes are followed by the sequence: * \tmm\wlp where mm is the M field; and wlp is: * 00 wwl lpp * [l0] ll = 0 for L = 0 (.128, .lz) * [l1] ll = 1 for L = 1 (.256) * [lig] ll = 2 for L don't care (always assembled as 0) * * [w0] ww = 0 for W = 0 * [w1 ] ww = 1 for W = 1 * [wig] ww = 2 for W don't care (always assembled as 0) * [ww] ww = 3 for W used as REX.W * * t = 0 for VEX (C4/C5), t = 1 for XOP (8F). * * \271 - instruction takes XRELEASE (F3) with or without lock * \272 - instruction takes XACQUIRE/XRELEASE with or without lock * \273 - instruction takes XACQUIRE/XRELEASE with lock only * \274..\277 - a byte immediate operand, from operand 0..3, sign-extended * to the operand size (if o16/o32/o64 present) or the bit size * \310 - indicates fixed 16-bit address size, i.e. optional 0x67. * \311 - indicates fixed 32-bit address size, i.e. optional 0x67. * \312 - (disassembler only) invalid with non-default address size. * \313 - indicates fixed 64-bit address size, 0x67 invalid. * \314 - (disassembler only) invalid with REX.B * \315 - (disassembler only) invalid with REX.X * \316 - (disassembler only) invalid with REX.R * \317 - (disassembler only) invalid with REX.W * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66. * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66. * \322 - indicates that this instruction is only valid when the * operand size is the default (instruction to disassembler, * generates no code in the assembler) * \323 - indicates fixed 64-bit operand size, REX on extensions only. * \324 - indicates 64-bit operand size requiring REX prefix. * \325 - instruction which always uses spl/bpl/sil/dil * \326 - instruction not valid with 0xF3 REP prefix. Hint for disassembler only; for SSE instructions. * \330 - a literal byte follows in the code stream, to be added * to the condition code value of the instruction. * \331 - instruction not valid with REP prefix. Hint for * disassembler only; for SSE instructions. * \332 - REP prefix (0xF2 byte) used as opcode extension. * \333 - REP prefix (0xF3 byte) used as opcode extension. * \334 - LOCK prefix used as REX.R (used in non-64-bit mode) * \335 - disassemble a rep (0xF3 byte) prefix as repe not rep. * \336 - force a REP(E) prefix (0xF3) even if not specified. * \337 - force a REPNE prefix (0xF2) even if not specified. * \336-\337 are still listed as prefixes in the disassembler. * \340 - reserve bytes of uninitialized storage. * Operand 0 had better be a segmentless constant. * \341 - this instruction needs a WAIT "prefix" * \360 - no SSE prefix (== \364\331) * \361 - 66 SSE prefix (== \366\331) * \364 - operand-size prefix (0x66) not permitted * \365 - address-size prefix (0x67) not permitted * \366 - operand-size prefix (0x66) used as opcode extension * \367 - address-size prefix (0x67) used as opcode extension * \370,\371 - match only if operand 0 meets byte jump criteria. * 370 is used for Jcc, 371 is used for JMP. * \373 - assemble 0x03 if bits==16, 0x05 if bits==32; * used for conditional jump over longer jump * \374 - this instruction takes an XMM VSIB memory EA * \375 - this instruction takes an YMM VSIB memory EA * \376 - this instruction takes an ZMM VSIB memory EA */ #include "compiler.h" #include #include #include #include "nasm.h" #include "nasmlib.h" #include "assemble.h" #include "insns.h" #include "tables.h" enum match_result { /* * Matching errors. These should be sorted so that more specific * errors come later in the sequence. */ MERR_INVALOP, MERR_OPSIZEMISSING, MERR_OPSIZEMISMATCH, MERR_BADCPU, MERR_BADMODE, MERR_BADHLE, MERR_ENCMISMATCH, /* * Matching success; the conditional ones first */ MOK_JUMP, /* Matching OK but needs jmp_match() */ MOK_GOOD /* Matching unconditionally OK */ }; typedef struct { enum ea_type type; /* what kind of EA is this? */ int sib_present; /* is a SIB byte necessary? */ int bytes; /* # of bytes of offset needed */ int size; /* lazy - this is sib+bytes+1 */ uint8_t modrm, sib, rex, rip; /* the bytes themselves */ int8_t disp8; /* compressed displacement for EVEX */ } ea; #define GEN_SIB(scale, index, base) \ (((scale) << 6) | ((index) << 3) | ((base))) #define GEN_MODRM(mod, reg, rm) \ (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7)) static iflags_t cpu; /* cpu level received from nasm.c */ static efunc errfunc; static struct ofmt *outfmt; static ListGen *list; static int64_t calcsize(int32_t, int64_t, int, insn *, const struct itemplate *); static void gencode(int32_t segment, int64_t offset, int bits, insn * ins, const struct itemplate *temp, int64_t insn_end); static enum match_result find_match(const struct itemplate **tempp, insn *instruction, int32_t segment, int64_t offset, int bits); static enum match_result matches(const struct itemplate *, insn *, int bits); static opflags_t regflag(const operand *); static int32_t regval(const operand *); static int rexflags(int, opflags_t, int); static int op_rexflags(const operand *, int); static int op_evexflags(const operand *, int, uint8_t); static void add_asp(insn *, int); static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *); static int has_prefix(insn * ins, enum prefix_pos pos, int prefix) { return ins->prefixes[pos] == prefix; } static void assert_no_prefix(insn * ins, enum prefix_pos pos) { if (ins->prefixes[pos]) errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(ins->prefixes[pos])); } static const char *size_name(int size) { switch (size) { case 1: return "byte"; case 2: return "word"; case 4: return "dword"; case 8: return "qword"; case 10: return "tword"; case 16: return "oword"; case 32: return "yword"; case 64: return "zword"; default: return "???"; } } static void warn_overflow(int pass, int size) { errfunc(ERR_WARNING | pass | ERR_WARN_NOV, "%s data exceeds bounds", size_name(size)); } static void warn_overflow_const(int64_t data, int size) { if (overflow_general(data, size)) warn_overflow(ERR_PASS1, size); } static void warn_overflow_opd(const struct operand *o, int size) { if (o->wrt == NO_SEG && o->segment == NO_SEG) { if (overflow_general(o->offset, size)) warn_overflow(ERR_PASS2, size); } } /* * This routine wrappers the real output format's output routine, * in order to pass a copy of the data off to the listing file * generator at the same time. */ static void out(int64_t offset, int32_t segto, const void *data, enum out_type type, uint64_t size, int32_t segment, int32_t wrt) { static int32_t lineno = 0; /* static!!! */ static char *lnfname = NULL; uint8_t p[8]; if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) { /* * This is a non-relocated address, and we're going to * convert it into RAWDATA format. */ uint8_t *q = p; if (size > 8) { errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8"); return; } WRITEADDR(q, *(int64_t *)data, size); data = p; type = OUT_RAWDATA; } list->output(offset, data, type, size); /* * this call to src_get determines when we call the * debug-format-specific "linenum" function * it updates lineno and lnfname to the current values * returning 0 if "same as last time", -2 if lnfname * changed, and the amount by which lineno changed, * if it did. thus, these variables must be static */ if (src_get(&lineno, &lnfname)) outfmt->current_dfmt->linenum(lnfname, lineno, segto); outfmt->output(segto, data, type, size, segment, wrt); } static void out_imm8(int64_t offset, int32_t segment, struct operand *opx) { if (opx->segment != NO_SEG) { uint64_t data = opx->offset; out(offset, segment, &data, OUT_ADDRESS, 1, opx->segment, opx->wrt); } else { uint8_t byte = opx->offset; out(offset, segment, &byte, OUT_RAWDATA, 1, NO_SEG, NO_SEG); } } static bool jmp_match(int32_t segment, int64_t offset, int bits, insn * ins, const struct itemplate *temp) { int64_t isize; const uint8_t *code = temp->code; uint8_t c = code[0]; if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT)) return false; if (!optimizing) return false; if (optimizing < 0 && c == 0371) return false; isize = calcsize(segment, offset, bits, ins, temp); if (ins->oprs[0].opflags & OPFLAG_UNKNOWN) /* Be optimistic in pass 1 */ return true; if (ins->oprs[0].segment != segment) return false; isize = ins->oprs[0].offset - offset - isize; /* isize is delta */ return (isize >= -128 && isize <= 127); /* is it byte size? */ } int64_t assemble(int32_t segment, int64_t offset, int bits, iflags_t cp, insn * instruction, struct ofmt *output, efunc error, ListGen * listgen) { const struct itemplate *temp; int j; enum match_result m; int64_t insn_end; int32_t itimes; int64_t start = offset; int64_t wsize; /* size for DB etc. */ errfunc = error; /* to pass to other functions */ cpu = cp; outfmt = output; /* likewise */ list = listgen; /* and again */ wsize = idata_bytes(instruction->opcode); if (wsize == -1) return 0; if (wsize) { extop *e; int32_t t = instruction->times; if (t < 0) errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()", t); while (t--) { /* repeat TIMES times */ list_for_each(e, instruction->eops) { if (e->type == EOT_DB_NUMBER) { if (wsize > 8) { errfunc(ERR_NONFATAL, "integer supplied to a DT, DO or DY" " instruction"); } else { out(offset, segment, &e->offset, OUT_ADDRESS, wsize, e->segment, e->wrt); offset += wsize; } } else if (e->type == EOT_DB_STRING || e->type == EOT_DB_STRING_FREE) { int align; out(offset, segment, e->stringval, OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG); align = e->stringlen % wsize; if (align) { align = wsize - align; out(offset, segment, zero_buffer, OUT_RAWDATA, align, NO_SEG, NO_SEG); } offset += e->stringlen + align; } } if (t > 0 && t == instruction->times - 1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output(offset, NULL, OUT_RAWDATA, 0); list->uplevel(LIST_TIMES); } } if (instruction->times > 1) list->downlevel(LIST_TIMES); return offset - start; } if (instruction->opcode == I_INCBIN) { const char *fname = instruction->eops->stringval; FILE *fp; fp = fopen(fname, "rb"); if (!fp) { error(ERR_NONFATAL, "`incbin': unable to open file `%s'", fname); } else if (fseek(fp, 0L, SEEK_END) < 0) { error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'", fname); fclose(fp); } else { static char buf[4096]; size_t t = instruction->times; size_t base = 0; size_t len; len = ftell(fp); if (instruction->eops->next) { base = instruction->eops->next->offset; len -= base; if (instruction->eops->next->next && len > (size_t)instruction->eops->next->next->offset) len = (size_t)instruction->eops->next->next->offset; } /* * Dummy call to list->output to give the offset to the * listing module. */ list->output(offset, NULL, OUT_RAWDATA, 0); list->uplevel(LIST_INCBIN); while (t--) { size_t l; fseek(fp, base, SEEK_SET); l = len; while (l > 0) { int32_t m; m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp); if (!m) { /* * This shouldn't happen unless the file * actually changes while we are reading * it. */ error(ERR_NONFATAL, "`incbin': unexpected EOF while" " reading file `%s'", fname); t = 0; /* Try to exit cleanly */ break; } out(offset, segment, buf, OUT_RAWDATA, m, NO_SEG, NO_SEG); l -= m; } } list->downlevel(LIST_INCBIN); if (instruction->times > 1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output(offset, NULL, OUT_RAWDATA, 0); list->uplevel(LIST_TIMES); list->downlevel(LIST_TIMES); } fclose(fp); return instruction->times * len; } return 0; /* if we're here, there's an error */ } /* Check to see if we need an address-size prefix */ add_asp(instruction, bits); m = find_match(&temp, instruction, segment, offset, bits); if (m == MOK_GOOD) { /* Matches! */ int64_t insn_size = calcsize(segment, offset, bits, instruction, temp); itimes = instruction->times; if (insn_size < 0) /* shouldn't be, on pass two */ error(ERR_PANIC, "errors made it through from pass one"); else while (itimes--) { for (j = 0; j < MAXPREFIX; j++) { uint8_t c = 0; switch (instruction->prefixes[j]) { case P_WAIT: c = 0x9B; break; case P_LOCK: c = 0xF0; break; case P_REPNE: case P_REPNZ: case P_XACQUIRE: c = 0xF2; break; case P_REPE: case P_REPZ: case P_REP: case P_XRELEASE: c = 0xF3; break; case R_CS: if (bits == 64) { error(ERR_WARNING | ERR_PASS2, "cs segment base generated, but will be ignored in 64-bit mode"); } c = 0x2E; break; case R_DS: if (bits == 64) { error(ERR_WARNING | ERR_PASS2, "ds segment base generated, but will be ignored in 64-bit mode"); } c = 0x3E; break; case R_ES: if (bits == 64) { error(ERR_WARNING | ERR_PASS2, "es segment base generated, but will be ignored in 64-bit mode"); } c = 0x26; break; case R_FS: c = 0x64; break; case R_GS: c = 0x65; break; case R_SS: if (bits == 64) { error(ERR_WARNING | ERR_PASS2, "ss segment base generated, but will be ignored in 64-bit mode"); } c = 0x36; break; case R_SEGR6: case R_SEGR7: error(ERR_NONFATAL, "segr6 and segr7 cannot be used as prefixes"); break; case P_A16: if (bits == 64) { error(ERR_NONFATAL, "16-bit addressing is not supported " "in 64-bit mode"); } else if (bits != 16) c = 0x67; break; case P_A32: if (bits != 32) c = 0x67; break; case P_A64: if (bits != 64) { error(ERR_NONFATAL, "64-bit addressing is only supported " "in 64-bit mode"); } break; case P_ASP: c = 0x67; break; case P_O16: if (bits != 16) c = 0x66; break; case P_O32: if (bits == 16) c = 0x66; break; case P_O64: /* REX.W */ break; case P_OSP: c = 0x66; break; case P_none: break; default: error(ERR_PANIC, "invalid instruction prefix"); } if (c != 0) { out(offset, segment, &c, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset++; } } insn_end = offset + insn_size; gencode(segment, offset, bits, instruction, temp, insn_end); offset += insn_size; if (itimes > 0 && itimes == instruction->times - 1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output(offset, NULL, OUT_RAWDATA, 0); list->uplevel(LIST_TIMES); } } if (instruction->times > 1) list->downlevel(LIST_TIMES); return offset - start; } else { /* No match */ switch (m) { case MERR_OPSIZEMISSING: error(ERR_NONFATAL, "operation size not specified"); break; case MERR_OPSIZEMISMATCH: error(ERR_NONFATAL, "mismatch in operand sizes"); break; case MERR_BADCPU: error(ERR_NONFATAL, "no instruction for this cpu level"); break; case MERR_BADMODE: error(ERR_NONFATAL, "instruction not supported in %d-bit mode", bits); break; default: error(ERR_NONFATAL, "invalid combination of opcode and operands"); break; } } return 0; } int64_t insn_size(int32_t segment, int64_t offset, int bits, iflags_t cp, insn * instruction, efunc error) { const struct itemplate *temp; enum match_result m; errfunc = error; /* to pass to other functions */ cpu = cp; if (instruction->opcode == I_none) return 0; if (instruction->opcode == I_DB || instruction->opcode == I_DW || instruction->opcode == I_DD || instruction->opcode == I_DQ || instruction->opcode == I_DT || instruction->opcode == I_DO || instruction->opcode == I_DY) { extop *e; int32_t isize, osize, wsize; isize = 0; wsize = idata_bytes(instruction->opcode); list_for_each(e, instruction->eops) { int32_t align; osize = 0; if (e->type == EOT_DB_NUMBER) { osize = 1; warn_overflow_const(e->offset, wsize); } else if (e->type == EOT_DB_STRING || e->type == EOT_DB_STRING_FREE) osize = e->stringlen; align = (-osize) % wsize; if (align < 0) align += wsize; isize += osize + align; } return isize * instruction->times; } if (instruction->opcode == I_INCBIN) { const char *fname = instruction->eops->stringval; FILE *fp; int64_t val = 0; size_t len; fp = fopen(fname, "rb"); if (!fp) error(ERR_NONFATAL, "`incbin': unable to open file `%s'", fname); else if (fseek(fp, 0L, SEEK_END) < 0) error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'", fname); else { len = ftell(fp); if (instruction->eops->next) { len -= instruction->eops->next->offset; if (instruction->eops->next->next && len > (size_t)instruction->eops->next->next->offset) { len = (size_t)instruction->eops->next->next->offset; } } val = instruction->times * len; } if (fp) fclose(fp); return val; } /* Check to see if we need an address-size prefix */ add_asp(instruction, bits); m = find_match(&temp, instruction, segment, offset, bits); if (m == MOK_GOOD) { /* we've matched an instruction. */ int64_t isize; int j; isize = calcsize(segment, offset, bits, instruction, temp); if (isize < 0) return -1; for (j = 0; j < MAXPREFIX; j++) { switch (instruction->prefixes[j]) { case P_A16: if (bits != 16) isize++; break; case P_A32: if (bits != 32) isize++; break; case P_O16: if (bits != 16) isize++; break; case P_O32: if (bits == 16) isize++; break; case P_A64: case P_O64: case P_none: break; default: isize++; break; } } return isize * instruction->times; } else { return -1; /* didn't match any instruction */ } } static void bad_hle_warn(const insn * ins, uint8_t hleok) { enum prefixes rep_pfx = ins->prefixes[PPS_REP]; enum whatwarn { w_none, w_lock, w_inval } ww; static const enum whatwarn warn[2][4] = { { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */ { w_inval, w_none, w_none, w_lock }, /* XRELEASE */ }; unsigned int n; n = (unsigned int)rep_pfx - P_XACQUIRE; if (n > 1) return; /* Not XACQUIRE/XRELEASE */ ww = warn[n][hleok]; if (!is_class(MEMORY, ins->oprs[0].type)) ww = w_inval; /* HLE requires operand 0 to be memory */ switch (ww) { case w_none: break; case w_lock: if (ins->prefixes[PPS_LOCK] != P_LOCK) { errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2, "%s with this instruction requires lock", prefix_name(rep_pfx)); } break; case w_inval: errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2, "%s invalid with this instruction", prefix_name(rep_pfx)); break; } } /* Common construct */ #define case3(x) case (x): case (x)+1: case (x)+2 #define case4(x) case3(x): case (x)+3 static int64_t calcsize(int32_t segment, int64_t offset, int bits, insn * ins, const struct itemplate *temp) { const uint8_t *codes = temp->code; int64_t length = 0; uint8_t c; int rex_mask = ~0; int op1, op2; struct operand *opx; uint8_t opex = 0; enum ea_type eat; uint8_t hleok = 0; bool lockcheck = true; ins->rex = 0; /* Ensure REX is reset */ eat = EA_SCALAR; /* Expect a scalar EA */ memset(ins->evex_p, 0, 3); /* Ensure EVEX is reset */ if (ins->prefixes[PPS_OSIZE] == P_O64) ins->rex |= REX_W; (void)segment; /* Don't warn that this parameter is unused */ (void)offset; /* Don't warn that this parameter is unused */ while (*codes) { c = *codes++; op1 = (c & 3) + ((opex & 1) << 2); op2 = ((c >> 3) & 3) + ((opex & 2) << 1); opx = &ins->oprs[op1]; opex = 0; /* For the next iteration */ switch (c) { case4(01): codes += c, length += c; break; case3(05): opex = c; break; case4(010): ins->rex |= op_rexflags(opx, REX_B|REX_H|REX_P|REX_W); codes++, length++; break; case4(020): case4(024): length++; break; case4(030): length += 2; break; case4(034): if (opx->type & (BITS16 | BITS32 | BITS64)) length += (opx->type & BITS16) ? 2 : 4; else length += (bits == 16) ? 2 : 4; break; case4(040): length += 4; break; case4(044): length += ins->addr_size >> 3; break; case4(050): length++; break; case4(054): length += 8; /* MOV reg64/imm */ break; case4(060): length += 2; break; case4(064): if (opx->type & (BITS16 | BITS32 | BITS64)) length += (opx->type & BITS16) ? 2 : 4; else length += (bits == 16) ? 2 : 4; break; case4(070): length += 4; break; case4(074): length += 2; break; case 0172: case 0173: codes++; length++; break; case4(0174): length++; break; case4(0240): ins->rex |= REX_EV; ins->vexreg = regval(opx); ins->evex_p[2] |= op_evexflags(opx, EVEX_P2VP, 2); /* High-16 NDS */ ins->vex_cm = *codes++; ins->vex_wlp = *codes++; ins->evex_tuple = (*codes++ - 0300); break; case 0250: ins->rex |= REX_EV; ins->vexreg = 0; ins->vex_cm = *codes++; ins->vex_wlp = *codes++; ins->evex_tuple = (*codes++ - 0300); break; case4(0254): length += 4; break; case4(0260): ins->rex |= REX_V; ins->vexreg = regval(opx); ins->vex_cm = *codes++; ins->vex_wlp = *codes++; break; case 0270: ins->rex |= REX_V; ins->vexreg = 0; ins->vex_cm = *codes++; ins->vex_wlp = *codes++; break; case3(0271): hleok = c & 3; break; case4(0274): length++; break; case4(0300): break; case 0310: if (bits == 64) return -1; length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16); break; case 0311: length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32); break; case 0312: break; case 0313: if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) || has_prefix(ins, PPS_ASIZE, P_A32)) return -1; break; case4(0314): break; case 0320: { enum prefixes pfx = ins->prefixes[PPS_OSIZE]; if (pfx == P_O16) break; if (pfx != P_none) errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix"); else ins->prefixes[PPS_OSIZE] = P_O16; break; } case 0321: { enum prefixes pfx = ins->prefixes[PPS_OSIZE]; if (pfx == P_O32) break; if (pfx != P_none) errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix"); else ins->prefixes[PPS_OSIZE] = P_O32; break; } case 0322: break; case 0323: rex_mask &= ~REX_W; break; case 0324: ins->rex |= REX_W; break; case 0325: ins->rex |= REX_NH; break; case 0326: break; case 0330: codes++, length++; break; case 0331: break; case 0332: case 0333: length++; break; case 0334: ins->rex |= REX_L; break; case 0335: break; case 0336: if (!ins->prefixes[PPS_REP]) ins->prefixes[PPS_REP] = P_REP; break; case 0337: if (!ins->prefixes[PPS_REP]) ins->prefixes[PPS_REP] = P_REPNE; break; case 0340: if (ins->oprs[0].segment != NO_SEG) errfunc(ERR_NONFATAL, "attempt to reserve non-constant" " quantity of BSS space"); else length += ins->oprs[0].offset; break; case 0341: if (!ins->prefixes[PPS_WAIT]) ins->prefixes[PPS_WAIT] = P_WAIT; break; case 0360: break; case 0361: length++; break; case 0364: case 0365: break; case 0366: case 0367: length++; break; case3(0370): break; case 0373: length++; break; case 0374: eat = EA_XMMVSIB; break; case 0375: eat = EA_YMMVSIB; break; case 0376: eat = EA_ZMMVSIB; break; case4(0100): case4(0110): case4(0120): case4(0130): case4(0200): case4(0204): case4(0210): case4(0214): case4(0220): case4(0224): case4(0230): case4(0234): { ea ea_data; int rfield; opflags_t rflags; struct operand *opy = &ins->oprs[op2]; struct operand *op_er_sae; ea_data.rex = 0; /* Ensure ea.REX is initially 0 */ if (c <= 0177) { /* pick rfield from operand b (opx) */ rflags = regflag(opx); rfield = nasm_regvals[opx->basereg]; } else { rflags = 0; rfield = c & 7; } /* EVEX.b1 : evex_brerop contains the operand position */ op_er_sae = (ins->evex_brerop >= 0 ? &ins->oprs[ins->evex_brerop] : NULL); if (op_er_sae && (op_er_sae->decoflags & (ER | SAE))) { /* set EVEX.b */ ins->evex_p[2] |= EVEX_P2B; if (op_er_sae->decoflags & ER) { /* set EVEX.RC (rounding control) */ ins->evex_p[2] |= ((ins->evex_rm - BRC_RN) << 5) & EVEX_P2RC; } } else { /* set EVEX.L'L (vector length) */ ins->evex_p[2] |= ((ins->vex_wlp << (5 - 2)) & EVEX_P2LL); if (opy->decoflags & BRDCAST_MASK) { /* set EVEX.b */ ins->evex_p[2] |= EVEX_P2B; } } if (process_ea(opy, &ea_data, bits, rfield, rflags, ins) != eat) { errfunc(ERR_NONFATAL, "invalid effective address"); return -1; } else { ins->rex |= ea_data.rex; length += ea_data.size; } } break; default: errfunc(ERR_PANIC, "internal instruction table corrupt" ": instruction code \\%o (0x%02X) given", c, c); break; } } ins->rex &= rex_mask; if (ins->rex & REX_NH) { if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "instruction cannot use high registers"); return -1; } ins->rex &= ~REX_P; /* Don't force REX prefix due to high reg */ } if (ins->rex & (REX_V | REX_EV)) { int bad32 = REX_R|REX_W|REX_X|REX_B; if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "cannot use high register in AVX instruction"); return -1; } switch (ins->vex_wlp & 060) { case 000: case 040: ins->rex &= ~REX_W; break; case 020: ins->rex |= REX_W; bad32 &= ~REX_W; break; case 060: /* Follow REX_W */ break; } if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) { errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); return -1; } else if (!(ins->rex & REX_EV) && ((ins->vexreg > 15) || (ins->evex_p[0] & 0xf0))) { errfunc(ERR_NONFATAL, "invalid high-16 register in non-AVX-512"); return -1; } if (ins->rex & REX_EV) length += 4; else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) length += 3; else length += 2; } else if (ins->rex & REX_REAL) { if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "cannot use high register in rex instruction"); return -1; } else if (bits == 64) { length++; } else if ((ins->rex & REX_L) && !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) && cpu >= IF_X86_64) { /* LOCK-as-REX.R */ assert_no_prefix(ins, PPS_LOCK); lockcheck = false; /* Already errored, no need for warning */ length++; } else { errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); return -1; } } if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck && (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) { errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 , "instruction is not lockable"); } bad_hle_warn(ins, hleok); return length; } static inline unsigned int emit_rex(insn *ins, int32_t segment, int64_t offset, int bits) { if (bits == 64) { if ((ins->rex & REX_REAL) && !(ins->rex & (REX_V | REX_EV))) { ins->rex = (ins->rex & REX_REAL) | REX_P; out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); ins->rex = 0; return 1; } } return 0; } static void gencode(int32_t segment, int64_t offset, int bits, insn * ins, const struct itemplate *temp, int64_t insn_end) { uint8_t c; uint8_t bytes[4]; int64_t size; int64_t data; int op1, op2; struct operand *opx; const uint8_t *codes = temp->code; uint8_t opex = 0; enum ea_type eat = EA_SCALAR; while (*codes) { c = *codes++; op1 = (c & 3) + ((opex & 1) << 2); op2 = ((c >> 3) & 3) + ((opex & 2) << 1); opx = &ins->oprs[op1]; opex = 0; /* For the next iteration */ switch (c) { case 01: case 02: case 03: case 04: offset += emit_rex(ins, segment, offset, bits); out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG); codes += c; offset += c; break; case 05: case 06: case 07: opex = c; break; case4(010): offset += emit_rex(ins, segment, offset, bits); bytes[0] = *codes++ + (regval(opx) & 7); out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case4(020): if (opx->offset < -256 || opx->offset > 255) { errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "byte value exceeds bounds"); } out_imm8(offset, segment, opx); offset += 1; break; case4(024): if (opx->offset < 0 || opx->offset > 255) errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "unsigned byte value exceeds bounds"); out_imm8(offset, segment, opx); offset += 1; break; case4(030): warn_overflow_opd(opx, 2); data = opx->offset; out(offset, segment, &data, OUT_ADDRESS, 2, opx->segment, opx->wrt); offset += 2; break; case4(034): if (opx->type & (BITS16 | BITS32)) size = (opx->type & BITS16) ? 2 : 4; else size = (bits == 16) ? 2 : 4; warn_overflow_opd(opx, size); data = opx->offset; out(offset, segment, &data, OUT_ADDRESS, size, opx->segment, opx->wrt); offset += size; break; case4(040): warn_overflow_opd(opx, 4); data = opx->offset; out(offset, segment, &data, OUT_ADDRESS, 4, opx->segment, opx->wrt); offset += 4; break; case4(044): data = opx->offset; size = ins->addr_size >> 3; warn_overflow_opd(opx, size); out(offset, segment, &data, OUT_ADDRESS, size, opx->segment, opx->wrt); offset += size; break; case4(050): if (opx->segment != segment) { data = opx->offset; out(offset, segment, &data, OUT_REL1ADR, insn_end - offset, opx->segment, opx->wrt); } else { data = opx->offset - insn_end; if (data > 127 || data < -128) errfunc(ERR_NONFATAL, "short jump is out of range"); out(offset, segment, &data, OUT_ADDRESS, 1, NO_SEG, NO_SEG); } offset += 1; break; case4(054): data = (int64_t)opx->offset; out(offset, segment, &data, OUT_ADDRESS, 8, opx->segment, opx->wrt); offset += 8; break; case4(060): if (opx->segment != segment) { data = opx->offset; out(offset, segment, &data, OUT_REL2ADR, insn_end - offset, opx->segment, opx->wrt); } else { data = opx->offset - insn_end; out(offset, segment, &data, OUT_ADDRESS, 2, NO_SEG, NO_SEG); } offset += 2; break; case4(064): if (opx->type & (BITS16 | BITS32 | BITS64)) size = (opx->type & BITS16) ? 2 : 4; else size = (bits == 16) ? 2 : 4; if (opx->segment != segment) { data = opx->offset; out(offset, segment, &data, size == 2 ? OUT_REL2ADR : OUT_REL4ADR, insn_end - offset, opx->segment, opx->wrt); } else { data = opx->offset - insn_end; out(offset, segment, &data, OUT_ADDRESS, size, NO_SEG, NO_SEG); } offset += size; break; case4(070): if (opx->segment != segment) { data = opx->offset; out(offset, segment, &data, OUT_REL4ADR, insn_end - offset, opx->segment, opx->wrt); } else { data = opx->offset - insn_end; out(offset, segment, &data, OUT_ADDRESS, 4, NO_SEG, NO_SEG); } offset += 4; break; case4(074): if (opx->segment == NO_SEG) errfunc(ERR_NONFATAL, "value referenced by FAR is not" " relocatable"); data = 0; out(offset, segment, &data, OUT_ADDRESS, 2, outfmt->segbase(1 + opx->segment), opx->wrt); offset += 2; break; case 0172: c = *codes++; opx = &ins->oprs[c >> 3]; bytes[0] = nasm_regvals[opx->basereg] << 4; opx = &ins->oprs[c & 7]; if (opx->segment != NO_SEG || opx->wrt != NO_SEG) { errfunc(ERR_NONFATAL, "non-absolute expression not permitted as argument %d", c & 7); } else { if (opx->offset & ~15) { errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "four-bit argument exceeds bounds"); } bytes[0] |= opx->offset & 15; } out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset++; break; case 0173: c = *codes++; opx = &ins->oprs[c >> 4]; bytes[0] = nasm_regvals[opx->basereg] << 4; bytes[0] |= c & 15; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset++; break; case4(0174): bytes[0] = nasm_regvals[opx->basereg] << 4; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset++; break; case4(0254): data = opx->offset; if (opx->wrt == NO_SEG && opx->segment == NO_SEG && (int32_t)data != (int64_t)data) { errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "signed dword immediate exceeds bounds"); } out(offset, segment, &data, OUT_ADDRESS, 4, opx->segment, opx->wrt); offset += 4; break; case4(0240): case 0250: codes += 3; ins->evex_p[2] |= op_evexflags(&ins->oprs[0], EVEX_P2Z | EVEX_P2AAA, 2); ins->evex_p[2] ^= EVEX_P2VP; /* 1's complement */ bytes[0] = 0x62; /* EVEX.X can be set by either REX or EVEX for different reasons */ bytes[1] = (~(((ins->rex & 7) << 5) | (ins->evex_p[0] & (EVEX_P0X | EVEX_P0RP))) & 0xf0) | (ins->vex_cm & 3); bytes[2] = ((ins->rex & REX_W) << (7 - 3)) | ((~ins->vexreg & 15) << 3) | (1 << 2) | (ins->vex_wlp & 3); bytes[3] = ins->evex_p[2]; out(offset, segment, &bytes, OUT_RAWDATA, 4, NO_SEG, NO_SEG); offset += 4; break; case4(0260): case 0270: codes += 2; if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) { bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4; bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5); bytes[2] = ((ins->rex & REX_W) << (7-3)) | ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07); out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG); offset += 3; } else { bytes[0] = 0xc5; bytes[1] = ((~ins->rex & REX_R) << (7-2)) | ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07); out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG); offset += 2; } break; case 0271: case 0272: case 0273: break; case4(0274): { uint64_t uv, um; int s; if (ins->rex & REX_W) s = 64; else if (ins->prefixes[PPS_OSIZE] == P_O16) s = 16; else if (ins->prefixes[PPS_OSIZE] == P_O32) s = 32; else s = bits; um = (uint64_t)2 << (s-1); uv = opx->offset; if (uv > 127 && uv < (uint64_t)-128 && (uv < um-128 || uv > um-1)) { /* If this wasn't explicitly byte-sized, warn as though we * had fallen through to the imm16/32/64 case. */ errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "%s value exceeds bounds", (opx->type & BITS8) ? "signed byte" : s == 16 ? "word" : s == 32 ? "dword" : "signed dword"); } if (opx->segment != NO_SEG) { data = uv; out(offset, segment, &data, OUT_ADDRESS, 1, opx->segment, opx->wrt); } else { bytes[0] = uv; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); } offset += 1; break; } case4(0300): break; case 0310: if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) { *bytes = 0x67; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; break; case 0311: if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) { *bytes = 0x67; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; break; case 0312: break; case 0313: ins->rex = 0; break; case4(0314): break; case 0320: case 0321: break; case 0322: case 0323: break; case 0324: ins->rex |= REX_W; break; case 0325: break; case 0326: break; case 0330: *bytes = *codes++ ^ get_cond_opcode(ins->condition); out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case 0331: break; case 0332: case 0333: *bytes = c - 0332 + 0xF2; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case 0334: if (ins->rex & REX_R) { *bytes = 0xF0; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; } ins->rex &= ~(REX_L|REX_R); break; case 0335: break; case 0336: case 0337: break; case 0340: if (ins->oprs[0].segment != NO_SEG) errfunc(ERR_PANIC, "non-constant BSS size in pass two"); else { int64_t size = ins->oprs[0].offset; if (size > 0) out(offset, segment, NULL, OUT_RESERVE, size, NO_SEG, NO_SEG); offset += size; } break; case 0341: break; case 0360: break; case 0361: bytes[0] = 0x66; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case 0364: case 0365: break; case 0366: case 0367: *bytes = c - 0366 + 0x66; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case 0370: case 0371: break; case 0373: *bytes = bits == 16 ? 3 : 5; out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); offset += 1; break; case 0374: eat = EA_XMMVSIB; break; case 0375: eat = EA_YMMVSIB; break; case 0376: eat = EA_ZMMVSIB; break; case4(0100): case4(0110): case4(0120): case4(0130): case4(0200): case4(0204): case4(0210): case4(0214): case4(0220): case4(0224): case4(0230): case4(0234): { ea ea_data; int rfield; opflags_t rflags; uint8_t *p; int32_t s; struct operand *opy = &ins->oprs[op2]; if (c <= 0177) { /* pick rfield from operand b (opx) */ rflags = regflag(opx); rfield = nasm_regvals[opx->basereg]; } else { /* rfield is constant */ rflags = 0; rfield = c & 7; } if (process_ea(opy, &ea_data, bits, rfield, rflags, ins) != eat) errfunc(ERR_NONFATAL, "invalid effective address"); p = bytes; *p++ = ea_data.modrm; if (ea_data.sib_present) *p++ = ea_data.sib; s = p - bytes; out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG); /* * Make sure the address gets the right offset in case * the line breaks in the .lst file (BR 1197827) */ offset += s; s = 0; switch (ea_data.bytes) { case 0: break; case 1: case 2: case 4: case 8: /* use compressed displacement, if available */ data = ea_data.disp8 ? ea_data.disp8 : opy->offset; s += ea_data.bytes; if (ea_data.rip) { if (opy->segment == segment) { data -= insn_end; if (overflow_signed(data, ea_data.bytes)) warn_overflow(ERR_PASS2, ea_data.bytes); out(offset, segment, &data, OUT_ADDRESS, ea_data.bytes, NO_SEG, NO_SEG); } else { /* overflow check in output/linker? */ out(offset, segment, &data, OUT_REL4ADR, insn_end - offset, opy->segment, opy->wrt); } } else { if (overflow_general(data, ins->addr_size >> 3) || signed_bits(data, ins->addr_size) != signed_bits(data, ea_data.bytes * 8)) warn_overflow(ERR_PASS2, ea_data.bytes); out(offset, segment, &data, OUT_ADDRESS, ea_data.bytes, opy->segment, opy->wrt); } break; default: /* Impossible! */ errfunc(ERR_PANIC, "Invalid amount of bytes (%d) for offset?!", ea_data.bytes); break; } offset += s; } break; default: errfunc(ERR_PANIC, "internal instruction table corrupt" ": instruction code \\%o (0x%02X) given", c, c); break; } } } static opflags_t regflag(const operand * o) { if (!is_register(o->basereg)) errfunc(ERR_PANIC, "invalid operand passed to regflag()"); return nasm_reg_flags[o->basereg]; } static int32_t regval(const operand * o) { if (!is_register(o->basereg)) errfunc(ERR_PANIC, "invalid operand passed to regval()"); return nasm_regvals[o->basereg]; } static int op_rexflags(const operand * o, int mask) { opflags_t flags; int val; if (!is_register(o->basereg)) errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()"); flags = nasm_reg_flags[o->basereg]; val = nasm_regvals[o->basereg]; return rexflags(val, flags, mask); } static int rexflags(int val, opflags_t flags, int mask) { int rex = 0; if (val >= 8) rex |= REX_B|REX_X|REX_R; if (flags & BITS64) rex |= REX_W; if (!(REG_HIGH & ~flags)) /* AH, CH, DH, BH */ rex |= REX_H; else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */ rex |= REX_P; return rex & mask; } static int evexflags(int val, decoflags_t deco, int mask, uint8_t byte) { int evex = 0; switch(byte) { case 0: if (val >= 16) evex |= (EVEX_P0RP | EVEX_P0X); break; case 2: if (val >= 16) evex |= EVEX_P2VP; if (deco & Z) evex |= EVEX_P2Z; if (deco & OPMASK_MASK) evex |= deco & EVEX_P2AAA; break; } return evex & mask; } static int op_evexflags(const operand * o, int mask, uint8_t byte) { int val; if (!is_register(o->basereg)) errfunc(ERR_PANIC, "invalid operand passed to op_evexflags()"); val = nasm_regvals[o->basereg]; return evexflags(val, o->decoflags, mask, byte); } static enum match_result find_match(const struct itemplate **tempp, insn *instruction, int32_t segment, int64_t offset, int bits) { const struct itemplate *temp; enum match_result m, merr; opflags_t xsizeflags[MAX_OPERANDS]; bool opsizemissing = false; int8_t broadcast = instruction->evex_brerop; int i; /* broadcasting uses a different data element size */ for (i = 0; i < instruction->operands; i++) if (i == broadcast) xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK; else xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK; merr = MERR_INVALOP; for (temp = nasm_instructions[instruction->opcode]; temp->opcode != I_none; temp++) { m = matches(temp, instruction, bits); if (m == MOK_JUMP) { if (jmp_match(segment, offset, bits, instruction, temp)) m = MOK_GOOD; else m = MERR_INVALOP; } else if (m == MERR_OPSIZEMISSING && (temp->flags & IF_SMASK) != IF_SX) { /* * Missing operand size and a candidate for fuzzy matching... */ for (i = 0; i < temp->operands; i++) if (i == broadcast) xsizeflags[i] |= temp->deco[i] & BRSIZE_MASK; else xsizeflags[i] |= temp->opd[i] & SIZE_MASK; opsizemissing = true; } if (m > merr) merr = m; if (merr == MOK_GOOD) goto done; } /* No match, but see if we can get a fuzzy operand size match... */ if (!opsizemissing) goto done; for (i = 0; i < instruction->operands; i++) { /* * We ignore extrinsic operand sizes on registers, so we should * never try to fuzzy-match on them. This also resolves the case * when we have e.g. "xmmrm128" in two different positions. */ if (is_class(REGISTER, instruction->oprs[i].type)) continue; /* This tests if xsizeflags[i] has more than one bit set */ if ((xsizeflags[i] & (xsizeflags[i]-1))) goto done; /* No luck */ if (i == broadcast) instruction->oprs[i].decoflags |= xsizeflags[i]; else instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */ } /* Try matching again... */ for (temp = nasm_instructions[instruction->opcode]; temp->opcode != I_none; temp++) { m = matches(temp, instruction, bits); if (m == MOK_JUMP) { if (jmp_match(segment, offset, bits, instruction, temp)) m = MOK_GOOD; else m = MERR_INVALOP; } if (m > merr) merr = m; if (merr == MOK_GOOD) goto done; } done: *tempp = temp; return merr; } static enum match_result matches(const struct itemplate *itemp, insn *instruction, int bits) { opflags_t size[MAX_OPERANDS], asize; bool opsizemissing = false; int i, oprs; /* * Check the opcode */ if (itemp->opcode != instruction->opcode) return MERR_INVALOP; /* * Count the operands */ if (itemp->operands != instruction->operands) return MERR_INVALOP; /* * Is it legal? */ if (!(optimizing > 0) && (itemp->flags & IF_OPT)) return MERR_INVALOP; /* * Check that no spurious colons or TOs are present */ for (i = 0; i < itemp->operands; i++) if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO)) return MERR_INVALOP; /* * Process size flags */ switch (itemp->flags & IF_SMASK) { case IF_SB: asize = BITS8; break; case IF_SW: asize = BITS16; break; case IF_SD: asize = BITS32; break; case IF_SQ: asize = BITS64; break; case IF_SO: asize = BITS128; break; case IF_SY: asize = BITS256; break; case IF_SZ: asize = BITS512; break; case IF_SIZE: switch (bits) { case 16: asize = BITS16; break; case 32: asize = BITS32; break; case 64: asize = BITS64; break; default: asize = 0; break; } break; default: asize = 0; break; } if (itemp->flags & IF_ARMASK) { /* S- flags only apply to a specific operand */ i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1; memset(size, 0, sizeof size); size[i] = asize; } else { /* S- flags apply to all operands */ for (i = 0; i < MAX_OPERANDS; i++) size[i] = asize; } /* * Check that the operand flags all match up, * it's a bit tricky so lets be verbose: * * 1) Find out the size of operand. If instruction * doesn't have one specified -- we're trying to * guess it either from template (IF_S* flag) or * from code bits. * * 2) If template operand do not match the instruction OR * template has an operand size specified AND this size differ * from which instruction has (perhaps we got it from code bits) * we are: * a) Check that only size of instruction and operand is differ * other characteristics do match * b) Perhaps it's a register specified in instruction so * for such a case we just mark that operand as "size * missing" and this will turn on fuzzy operand size * logic facility (handled by a caller) */ for (i = 0; i < itemp->operands; i++) { opflags_t type = instruction->oprs[i].type; decoflags_t deco = instruction->oprs[i].decoflags; if (!(type & SIZE_MASK)) type |= size[i]; if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (itemp->deco[i] & deco) != deco) { return MERR_INVALOP; } else if ((itemp->opd[i] & SIZE_MASK) && (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) { if (type & SIZE_MASK) { /* * when broadcasting, the element size depends on * the instruction type. decorator flag should match. */ #define MATCH_BRSZ(bits) (((type & SIZE_MASK) == BITS##bits) && \ ((itemp->deco[i] & BRSIZE_MASK) == BR_BITS##bits)) if (!((deco & BRDCAST_MASK) && (MATCH_BRSZ(32) || MATCH_BRSZ(64)))) { return MERR_INVALOP; } } else if (!is_class(REGISTER, type)) { /* * Note: we don't honor extrinsic operand sizes for registers, * so "missing operand size" for a register should be * considered a wildcard match rather than an error. */ opsizemissing = true; } } else if (is_register(instruction->oprs[i].basereg) && nasm_regvals[instruction->oprs[i].basereg] >= 16 && !(itemp->flags & IF_AVX512)) { return MERR_ENCMISMATCH; } } if (opsizemissing) return MERR_OPSIZEMISSING; /* * Check operand sizes */ if (itemp->flags & (IF_SM | IF_SM2)) { oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands); for (i = 0; i < oprs; i++) { asize = itemp->opd[i] & SIZE_MASK; if (asize) { for (i = 0; i < oprs; i++) size[i] = asize; break; } } } else { oprs = itemp->operands; } for (i = 0; i < itemp->operands; i++) { if (!(itemp->opd[i] & SIZE_MASK) && (instruction->oprs[i].type & SIZE_MASK & ~size[i])) return MERR_OPSIZEMISMATCH; } /* * Check template is okay at the set cpu level */ if (((itemp->flags & IF_PLEVEL) > cpu)) return MERR_BADCPU; /* * Verify the appropriate long mode flag. */ if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG))) return MERR_BADMODE; /* * If we have a HLE prefix, look for the NOHLE flag */ if ((itemp->flags & IF_NOHLE) && (has_prefix(instruction, PPS_REP, P_XACQUIRE) || has_prefix(instruction, PPS_REP, P_XRELEASE))) return MERR_BADHLE; /* * Check if special handling needed for Jumps */ if ((itemp->code[0] & ~1) == 0370) return MOK_JUMP; return MOK_GOOD; } /* * Check if offset is a multiple of N with corresponding tuple type * if Disp8*N is available, compressed displacement is stored in compdisp */ static bool is_disp8n(operand *input, insn *ins, int8_t *compdisp) { const uint8_t fv_n[2][2][VLMAX] = {{{16, 32, 64}, {4, 4, 4}}, {{16, 32, 64}, {8, 8, 8}}}; const uint8_t hv_n[2][VLMAX] = {{8, 16, 32}, {4, 4, 4}}; const uint8_t dup_n[VLMAX] = {8, 32, 64}; bool evex_b = input->decoflags & BRDCAST_MASK; enum ttypes tuple = ins->evex_tuple; /* vex_wlp composed as [wwllpp] */ enum vectlens vectlen = (ins->vex_wlp & 0x0c) >> 2; /* wig(=2) is treated as w0(=0) */ bool evex_w = (ins->vex_wlp & 0x10) >> 4; int32_t off = input->offset; uint8_t n = 0; int32_t disp8; switch(tuple) { case FV: n = fv_n[evex_w][evex_b][vectlen]; break; case HV: n = hv_n[evex_b][vectlen]; break; case FVM: /* 16, 32, 64 for VL 128, 256, 512 respectively*/ n = 1 << (vectlen + 4); break; case T1S8: /* N = 1 */ case T1S16: /* N = 2 */ n = tuple - T1S8 + 1; break; case T1S: /* N = 4 for 32bit, 8 for 64bit */ n = evex_w ? 8 : 4; break; case T1F32: case T1F64: /* N = 4 for 32bit, 8 for 64bit */ n = (tuple == T1F32 ? 4 : 8); break; case T2: case T4: case T8: if (vectlen + 7 <= (evex_w + 5) + (tuple - T2 + 1)) n = 0; else n = 1 << (tuple - T2 + evex_w + 3); break; case HVM: case QVM: case OVM: n = 1 << (OVM - tuple + vectlen + 1); break; case M128: n = 16; break; case DUP: n = dup_n[vectlen]; break; default: break; } if (n && !(off & (n - 1))) { disp8 = off / n; /* if it fits in Disp8 */ if (disp8 >= -128 && disp8 <= 127) { *compdisp = disp8; return true; } } *compdisp = 0; return false; } /* * Check if ModR/M.mod should/can be 01. * - EAF_BYTEOFFS is set * - offset can fit in a byte when EVEX is not used * - offset can be compressed when EVEX is used */ #define IS_MOD_01() (input->eaflags & EAF_BYTEOFFS || \ (o >= -128 && o <= 127 && \ seg == NO_SEG && !forw_ref && \ !(input->eaflags & EAF_WORDOFFS) && \ !(ins->rex & REX_EV)) || \ (ins->rex & REX_EV && \ is_disp8n(input, ins, &output->disp8))) static enum ea_type process_ea(operand *input, ea *output, int bits, int rfield, opflags_t rflags, insn *ins) { bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN); int addrbits = ins->addr_size; output->type = EA_SCALAR; output->rip = false; output->disp8 = 0; /* REX flags for the rfield operand */ output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H); /* EVEX.R' flag for the REG operand */ ins->evex_p[0] |= evexflags(rfield, 0, EVEX_P0RP, 0); if (is_class(REGISTER, input->type)) { /* * It's a direct register. */ if (!is_register(input->basereg)) goto err; if (!is_reg_class(REG_EA, input->basereg)) goto err; /* broadcasting is not available with a direct register operand. */ if (input->decoflags & BRDCAST_MASK) { nasm_error(ERR_NONFATAL, "Broadcasting not allowed from a register"); goto err; } output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H); ins->evex_p[0] |= op_evexflags(input, EVEX_P0X, 0); output->sib_present = false; /* no SIB necessary */ output->bytes = 0; /* no offset necessary either */ output->modrm = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]); } else { /* * It's a memory reference. */ /* Embedded rounding or SAE is not available with a mem ref operand. */ if (input->decoflags & (ER | SAE)) { nasm_error(ERR_NONFATAL, "Embedded rounding is available only with reg-reg op."); return -1; } if (input->basereg == -1 && (input->indexreg == -1 || input->scale == 0)) { /* * It's a pure offset. */ if (bits == 64 && ((input->type & IP_REL) == IP_REL) && input->segment == NO_SEG) { nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative"); input->type &= ~IP_REL; input->type |= MEMORY; } if (input->eaflags & EAF_BYTEOFFS || (input->eaflags & EAF_WORDOFFS && input->disp_size != (addrbits != 16 ? 32 : 16))) { nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address"); } if (bits == 64 && (~input->type & IP_REL)) { output->sib_present = true; output->sib = GEN_SIB(0, 4, 5); output->bytes = 4; output->modrm = GEN_MODRM(0, rfield, 4); output->rip = false; } else { output->sib_present = false; output->bytes = (addrbits != 16 ? 4 : 2); output->modrm = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6)); output->rip = bits == 64; } } else { /* * It's an indirection. */ int i = input->indexreg, b = input->basereg, s = input->scale; int32_t seg = input->segment; int hb = input->hintbase, ht = input->hinttype; int t, it, bt; /* register numbers */ opflags_t x, ix, bx; /* register flags */ if (s == 0) i = -1; /* make this easy, at least */ if (is_register(i)) { it = nasm_regvals[i]; ix = nasm_reg_flags[i]; } else { it = -1; ix = 0; } if (is_register(b)) { bt = nasm_regvals[b]; bx = nasm_reg_flags[b]; } else { bt = -1; bx = 0; } /* if either one are a vector register... */ if ((ix|bx) & (XMMREG|YMMREG|ZMMREG) & ~REG_EA) { opflags_t sok = BITS32 | BITS64; int32_t o = input->offset; int mod, scale, index, base; /* * For a vector SIB, one has to be a vector and the other, * if present, a GPR. The vector must be the index operand. */ if (it == -1 || (bx & (XMMREG|YMMREG|ZMMREG) & ~REG_EA)) { if (s == 0) s = 1; else if (s != 1) goto err; t = bt, bt = it, it = t; x = bx, bx = ix, ix = x; } if (bt != -1) { if (REG_GPR & ~bx) goto err; if (!(REG64 & ~bx) || !(REG32 & ~bx)) sok &= bx; else goto err; } /* * While we're here, ensure the user didn't specify * WORD or QWORD */ if (input->disp_size == 16 || input->disp_size == 64) goto err; if (addrbits == 16 || (addrbits == 32 && !(sok & BITS32)) || (addrbits == 64 && !(sok & BITS64))) goto err; output->type = ((ix & ZMMREG & ~REG_EA) ? EA_ZMMVSIB : ((ix & YMMREG & ~REG_EA) ? EA_YMMVSIB : EA_XMMVSIB)); output->rex |= rexflags(it, ix, REX_X); output->rex |= rexflags(bt, bx, REX_B); ins->evex_p[2] |= evexflags(it, 0, EVEX_P2VP, 2); index = it & 7; /* it is known to be != -1 */ switch (s) { case 1: scale = 0; break; case 2: scale = 1; break; case 4: scale = 2; break; case 8: scale = 3; break; default: /* then what the smeg is it? */ goto err; /* panic */ } if (bt == -1) { base = 5; mod = 0; } else { base = (bt & 7); if (base != REG_NUM_EBP && o == 0 && seg == NO_SEG && !forw_ref && !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS))) mod = 0; else if (IS_MOD_01()) mod = 1; else mod = 2; } output->sib_present = true; output->bytes = (bt == -1 || mod == 2 ? 4 : mod); output->modrm = GEN_MODRM(mod, rfield, 4); output->sib = GEN_SIB(scale, index, base); } else if ((ix|bx) & (BITS32|BITS64)) { /* * it must be a 32/64-bit memory reference. Firstly we have * to check that all registers involved are type E/Rxx. */ opflags_t sok = BITS32 | BITS64; int32_t o = input->offset; if (it != -1) { if (!(REG64 & ~ix) || !(REG32 & ~ix)) sok &= ix; else goto err; } if (bt != -1) { if (REG_GPR & ~bx) goto err; /* Invalid register */ if (~sok & bx & SIZE_MASK) goto err; /* Invalid size */ sok &= bx; } /* * While we're here, ensure the user didn't specify * WORD or QWORD */ if (input->disp_size == 16 || input->disp_size == 64) goto err; if (addrbits == 16 || (addrbits == 32 && !(sok & BITS32)) || (addrbits == 64 && !(sok & BITS64))) goto err; /* now reorganize base/index */ if (s == 1 && bt != it && bt != -1 && it != -1 && ((hb == b && ht == EAH_NOTBASE) || (hb == i && ht == EAH_MAKEBASE))) { /* swap if hints say so */ t = bt, bt = it, it = t; x = bx, bx = ix, ix = x; } if (bt == it) /* convert EAX+2*EAX to 3*EAX */ bt = -1, bx = 0, s++; if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) { /* make single reg base, unless hint */ bt = it, bx = ix, it = -1, ix = 0; } if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) || s == 3 || s == 5 || s == 9) && bt == -1) bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */ if (it == -1 && (bt & 7) != REG_NUM_ESP && (input->eaflags & EAF_TIMESTWO)) it = bt, ix = bx, bt = -1, bx = 0, s = 1; /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */ if (s == 1 && it == REG_NUM_ESP) { /* swap ESP into base if scale is 1 */ t = it, it = bt, bt = t; x = ix, ix = bx, bx = x; } if (it == REG_NUM_ESP || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1)) goto err; /* wrong, for various reasons */ output->rex |= rexflags(it, ix, REX_X); output->rex |= rexflags(bt, bx, REX_B); if (it == -1 && (bt & 7) != REG_NUM_ESP) { /* no SIB needed */ int mod, rm; if (bt == -1) { rm = 5; mod = 0; } else { rm = (bt & 7); if (rm != REG_NUM_EBP && o == 0 && seg == NO_SEG && !forw_ref && !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS))) mod = 0; else if (IS_MOD_01()) mod = 1; else mod = 2; } output->sib_present = false; output->bytes = (bt == -1 || mod == 2 ? 4 : mod); output->modrm = GEN_MODRM(mod, rfield, rm); } else { /* we need a SIB */ int mod, scale, index, base; if (it == -1) index = 4, s = 1; else index = (it & 7); switch (s) { case 1: scale = 0; break; case 2: scale = 1; break; case 4: scale = 2; break; case 8: scale = 3; break; default: /* then what the smeg is it? */ goto err; /* panic */ } if (bt == -1) { base = 5; mod = 0; } else { base = (bt & 7); if (base != REG_NUM_EBP && o == 0 && seg == NO_SEG && !forw_ref && !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS))) mod = 0; else if (IS_MOD_01()) mod = 1; else mod = 2; } output->sib_present = true; output->bytes = (bt == -1 || mod == 2 ? 4 : mod); output->modrm = GEN_MODRM(mod, rfield, 4); output->sib = GEN_SIB(scale, index, base); } } else { /* it's 16-bit */ int mod, rm; int16_t o = input->offset; /* check for 64-bit long mode */ if (addrbits == 64) goto err; /* check all registers are BX, BP, SI or DI */ if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) || (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI)) goto err; /* ensure the user didn't specify DWORD/QWORD */ if (input->disp_size == 32 || input->disp_size == 64) goto err; if (s != 1 && i != -1) goto err; /* no can do, in 16-bit EA */ if (b == -1 && i != -1) { int tmp = b; b = i; i = tmp; } /* swap */ if ((b == R_SI || b == R_DI) && i != -1) { int tmp = b; b = i; i = tmp; } /* have BX/BP as base, SI/DI index */ if (b == i) goto err; /* shouldn't ever happen, in theory */ if (i != -1 && b != -1 && (i == R_BP || i == R_BX || b == R_SI || b == R_DI)) goto err; /* invalid combinations */ if (b == -1) /* pure offset: handled above */ goto err; /* so if it gets to here, panic! */ rm = -1; if (i != -1) switch (i * 256 + b) { case R_SI * 256 + R_BX: rm = 0; break; case R_DI * 256 + R_BX: rm = 1; break; case R_SI * 256 + R_BP: rm = 2; break; case R_DI * 256 + R_BP: rm = 3; break; } else switch (b) { case R_SI: rm = 4; break; case R_DI: rm = 5; break; case R_BP: rm = 6; break; case R_BX: rm = 7; break; } if (rm == -1) /* can't happen, in theory */ goto err; /* so panic if it does */ if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 && !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS))) mod = 0; else if (IS_MOD_01()) mod = 1; else mod = 2; output->sib_present = false; /* no SIB - it's 16-bit */ output->bytes = mod; /* bytes of offset needed */ output->modrm = GEN_MODRM(mod, rfield, rm); } } } output->size = 1 + output->sib_present + output->bytes; return output->type; err: return output->type = EA_INVALID; } static void add_asp(insn *ins, int addrbits) { int j, valid; int defdisp; valid = (addrbits == 64) ? 64|32 : 32|16; switch (ins->prefixes[PPS_ASIZE]) { case P_A16: valid &= 16; break; case P_A32: valid &= 32; break; case P_A64: valid &= 64; break; case P_ASP: valid &= (addrbits == 32) ? 16 : 32; break; default: break; } for (j = 0; j < ins->operands; j++) { if (is_class(MEMORY, ins->oprs[j].type)) { opflags_t i, b; /* Verify as Register */ if (!is_register(ins->oprs[j].indexreg)) i = 0; else i = nasm_reg_flags[ins->oprs[j].indexreg]; /* Verify as Register */ if (!is_register(ins->oprs[j].basereg)) b = 0; else b = nasm_reg_flags[ins->oprs[j].basereg]; if (ins->oprs[j].scale == 0) i = 0; if (!i && !b) { int ds = ins->oprs[j].disp_size; if ((addrbits != 64 && ds > 8) || (addrbits == 64 && ds == 16)) valid &= ds; } else { if (!(REG16 & ~b)) valid &= 16; if (!(REG32 & ~b)) valid &= 32; if (!(REG64 & ~b)) valid &= 64; if (!(REG16 & ~i)) valid &= 16; if (!(REG32 & ~i)) valid &= 32; if (!(REG64 & ~i)) valid &= 64; } } } if (valid & addrbits) { ins->addr_size = addrbits; } else if (valid & ((addrbits == 32) ? 16 : 32)) { /* Add an address size prefix */ ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;; ins->addr_size = (addrbits == 32) ? 16 : 32; } else { /* Impossible... */ errfunc(ERR_NONFATAL, "impossible combination of address sizes"); ins->addr_size = addrbits; /* Error recovery */ } defdisp = ins->addr_size == 16 ? 16 : 32; for (j = 0; j < ins->operands; j++) { if (!(MEM_OFFS & ~ins->oprs[j].type) && (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) { /* * mem_offs sizes must match the address size; if not, * strip the MEM_OFFS bit and match only EA instructions */ ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY); } } }