summaryrefslogtreecommitdiff
path: root/parser.c
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2002-04-30 20:51:32 +0000
committerH. Peter Anvin <hpa@zytor.com>2002-04-30 20:51:32 +0000
commitea6e34db64c7da7cb885197316c6b5e7d048bdb9 (patch)
tree78e728348f8fe09e394a51c3617e6261de0f4001 /parser.c
downloadnasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.tar.gz
nasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.tar.bz2
nasm-ea6e34db64c7da7cb885197316c6b5e7d048bdb9.zip
NASM 0.91
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c1306
1 files changed, 1306 insertions, 0 deletions
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..14c7a5b
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,1306 @@
+/* parser.c source line parser for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the licence given in the file "Licence"
+ * distributed in the NASM archive.
+ *
+ * initial version 27/iii/95 by Simon Tatham
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "parser.h"
+#include "float.h"
+
+#include "names.c"
+
+
+static long reg_flags[] = { /* sizes and special flags */
+ 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
+ REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
+ REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
+ REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
+ REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
+ MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
+ REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
+ FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
+ REG_TREG
+};
+
+enum { /* special tokens */
+ S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
+ S_TWORD, S_WORD
+};
+
+static char *special_names[] = { /* and the actual text */
+ "byte", "dword", "far", "long", "near", "qword", "short", "to",
+ "tword", "word"
+};
+
+static char *prefix_names[] = {
+ "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
+ "repnz", "repz", "times"
+};
+
+/*
+ * Evaluator datatype. Expressions, within the evaluator, are
+ * stored as an array of these beasts, terminated by a record with
+ * type==0. Mostly, it's a vector type: each type denotes some kind
+ * of a component, and the value denotes the multiple of that
+ * component present in the expression. The exception is the WRT
+ * type, whose `value' field denotes the segment to which the
+ * expression is relative. These segments will be segment-base
+ * types, i.e. either odd segment values or SEG_ABS types. So it is
+ * still valid to assume that anything with a `value' field of zero
+ * is insignificant.
+ */
+typedef struct {
+ long type; /* a register, or EXPR_xxx */
+ long value; /* must be >= 32 bits */
+} expr;
+
+static void eval_reset(void);
+static expr *evaluate(int);
+
+/*
+ * ASSUMPTION MADE HERE. The number of distinct register names
+ * (i.e. possible "type" fields for an expr structure) does not
+ * exceed 126.
+ */
+#define EXPR_SIMPLE 126
+#define EXPR_WRT 127
+#define EXPR_SEGBASE 128
+
+static int is_reloc(expr *);
+static int is_simple(expr *);
+static int is_really_simple (expr *);
+static long reloc_value(expr *);
+static long reloc_seg(expr *);
+static long reloc_wrt(expr *);
+
+enum { /* token types, other than chars */
+ TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
+ TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
+ TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
+ TOKEN_FLOAT
+};
+
+struct tokenval {
+ long t_integer, t_inttwo;
+ char *t_charptr;
+};
+
+static char tempstorage[1024], *q;
+static int bsi (char *string, char **array, int size);/* binary search */
+
+static int nexttoken (void);
+static int is_comma_next (void);
+
+static char *bufptr;
+static int i;
+static struct tokenval tokval;
+static lfunc labelfunc;
+static efunc error;
+static char *label;
+static struct ofmt *outfmt;
+
+static long seg, ofs;
+
+insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
+ char *buffer, insn *result, struct ofmt *output,
+ efunc errfunc) {
+ int operand;
+ int critical;
+
+ q = tempstorage;
+ bufptr = buffer;
+ labelfunc = lookup_label;
+ outfmt = output;
+ error = errfunc;
+ seg = segment;
+ ofs = offset;
+ label = "";
+
+ i = nexttoken();
+
+ result->eops = NULL; /* must do this, whatever happens */
+
+ if (i==0) { /* blank line - ignore */
+ result->label = NULL; /* so, no label on it */
+ result->opcode = -1; /* and no instruction either */
+ return result;
+ }
+ if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
+ (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
+ error (ERR_NONFATAL, "label or instruction expected"
+ " at start of line");
+ result->label = NULL;
+ result->opcode = -1;
+ return result;
+ }
+
+ if (i == TOKEN_ID) { /* there's a label here */
+ label = result->label = tokval.t_charptr;
+ i = nexttoken();
+ if (i == ':') { /* skip over the optional colon */
+ i = nexttoken();
+ }
+ } else /* no label; so, moving swiftly on */
+ result->label = NULL;
+
+ if (i==0) {
+ result->opcode = -1; /* this line contains just a label */
+ return result;
+ }
+
+ result->nprefix = 0;
+ result->times = 1;
+
+ while (i == TOKEN_PREFIX ||
+ (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
+ /*
+ * Handle special case: the TIMES prefix.
+ */
+ if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
+ expr *value;
+
+ i = nexttoken();
+ eval_reset();
+ value = evaluate (pass);
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (!is_simple (value)) {
+ error (ERR_NONFATAL,
+ "non-constant argument supplied to TIMES");
+ result->times = 1;
+ } else
+ result->times = value->value;
+ } else {
+ if (result->nprefix == MAXPREFIX)
+ error (ERR_NONFATAL,
+ "instruction has more than %d prefixes", MAXPREFIX);
+ else
+ result->prefixes[result->nprefix++] = tokval.t_integer;
+ i = nexttoken();
+ }
+ }
+
+ if (i != TOKEN_INSN) {
+ error (ERR_NONFATAL, "parser: instruction expected");
+ result->opcode = -1;
+ return result;
+ }
+
+ result->opcode = tokval.t_integer;
+ result->condition = tokval.t_inttwo;
+
+ /*
+ * RESB, RESW and RESD cannot be satisfied with incorrectly
+ * evaluated operands, since the correct values _must_ be known
+ * on the first pass. Hence, even in pass one, we set the
+ * `critical' flag on calling evaluate(), so that it will bomb
+ * out on undefined symbols. Nasty, but there's nothing we can
+ * do about it.
+ *
+ * For the moment, EQU has the same difficulty, so we'll
+ * include that.
+ */
+ if (result->opcode == I_RESB ||
+ result->opcode == I_RESW ||
+ result->opcode == I_RESD ||
+ result->opcode == I_RESQ ||
+ result->opcode == I_REST ||
+ result->opcode == I_EQU)
+ critical = pass;
+ else
+ critical = (pass==2 ? 2 : 0);
+
+ if (result->opcode == I_DB ||
+ result->opcode == I_DW ||
+ result->opcode == I_DD ||
+ result->opcode == I_DQ ||
+ result->opcode == I_DT) {
+ extop *eop, **tail = &result->eops;
+ int oper_num = 0;
+
+ /*
+ * Begin to read the DB/DW/DD/DQ/DT operands.
+ */
+ while (1) {
+ i = nexttoken();
+ if (i == 0)
+ break;
+ eop = *tail = nasm_malloc(sizeof(extop));
+ tail = &eop->next;
+ eop->next = NULL;
+ eop->type = EOT_NOTHING;
+ oper_num++;
+
+ if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
+ eop->type = EOT_DB_STRING;
+ eop->stringval = tokval.t_charptr;
+ eop->stringlen = tokval.t_inttwo;
+ i = nexttoken(); /* eat the comma */
+ continue;
+ }
+
+ if (i == TOKEN_FLOAT || i == '-') {
+ long sign = +1L;
+
+ if (i == '-') {
+ char *save = bufptr;
+ i = nexttoken();
+ sign = -1L;
+ if (i != TOKEN_FLOAT) {
+ bufptr = save;
+ i = '-';
+ }
+ }
+
+ if (i == TOKEN_FLOAT) {
+ eop->type = EOT_DB_STRING;
+ eop->stringval = q;
+ if (result->opcode == I_DD)
+ eop->stringlen = 4;
+ else if (result->opcode == I_DQ)
+ eop->stringlen = 8;
+ else if (result->opcode == I_DT)
+ eop->stringlen = 10;
+ else {
+ error(ERR_NONFATAL, "floating-point constant"
+ " encountered in `D%c' instruction",
+ result->opcode == I_DW ? 'W' : 'B');
+ eop->type = EOT_NOTHING;
+ }
+ q += eop->stringlen;
+ if (!float_const (tokval.t_charptr, sign,
+ (unsigned char *)eop->stringval,
+ eop->stringlen, error))
+ eop->type = EOT_NOTHING;
+ i = nexttoken(); /* eat the comma */
+ continue;
+ }
+ }
+
+ /* anything else */ {
+ expr *value;
+ eval_reset();
+ value = evaluate (critical);
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1;/* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (is_reloc(value)) {
+ eop->type = EOT_DB_NUMBER;
+ eop->offset = reloc_value(value);
+ eop->segment = reloc_seg(value);
+ eop->wrt = reloc_wrt(value);
+ } else {
+ error (ERR_NONFATAL,
+ "`%s' operand %d: expression is not simple"
+ " or relocatable",
+ insn_names[result->opcode], oper_num);
+ }
+ }
+ }
+ return result;
+ }
+
+ /* right. Now we begin to parse the operands. There may be up to three
+ * of these, separated by commas, and terminated by a zero token. */
+
+ for (operand = 0; operand < 3; operand++) {
+ expr *seg, *value; /* used most of the time */
+ int mref; /* is this going to be a memory ref? */
+
+ result->oprs[operand].addr_size = 0;/* have to zero this whatever */
+ i = nexttoken();
+ if (i == 0) break; /* end of operands: get out of here */
+ result->oprs[operand].type = 0; /* so far, no override */
+ while (i == TOKEN_SPECIAL) {/* size specifiers */
+ switch ((int)tokval.t_integer) {
+ case S_BYTE:
+ result->oprs[operand].type |= BITS8;
+ break;
+ case S_WORD:
+ result->oprs[operand].type |= BITS16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].type |= BITS32;
+ break;
+ case S_QWORD:
+ result->oprs[operand].type |= BITS64;
+ break;
+ case S_TWORD:
+ result->oprs[operand].type |= BITS80;
+ break;
+ case S_TO:
+ result->oprs[operand].type |= TO;
+ break;
+ case S_FAR:
+ result->oprs[operand].type |= FAR;
+ break;
+ case S_NEAR:
+ result->oprs[operand].type |= NEAR;
+ break;
+ case S_SHORT:
+ result->oprs[operand].type |= SHORT;
+ break;
+ }
+ i = nexttoken();
+ }
+
+ if (i == '[') { /* memory reference */
+ i = nexttoken();
+ mref = TRUE;
+ if (i == TOKEN_SPECIAL) { /* check for address size override */
+ switch ((int)tokval.t_integer) {
+ case S_WORD:
+ result->oprs[operand].addr_size = 16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].addr_size = 32;
+ break;
+ default:
+ error (ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ }
+ i = nexttoken();
+ }
+ } else /* immediate operand, or register */
+ mref = FALSE;
+
+ eval_reset();
+
+ value = evaluate (critical);
+ if (!value) { /* error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ if (i == ':' && mref) { /* it was seg:offset */
+ seg = value; /* so shift this into the segment */
+ i = nexttoken(); /* then skip the colon */
+ if (i == TOKEN_SPECIAL) { /* another check for size override */
+ switch ((int)tokval.t_integer) {
+ case S_WORD:
+ result->oprs[operand].addr_size = 16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].addr_size = 32;
+ break;
+ default:
+ error (ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ }
+ i = nexttoken();
+ }
+ value = evaluate (critical);
+ /* and get the offset */
+ if (!value) { /* but, error in evaluator */
+ result->opcode = -1; /* unrecoverable parse error: */
+ return result; /* ignore this instruction */
+ }
+ } else seg = NULL;
+ if (mref) { /* find ] at the end */
+ if (i != ']') {
+ error (ERR_NONFATAL, "parser: expecting ]");
+ do { /* error recovery again */
+ i = nexttoken();
+ } while (i != 0 && i != ',');
+ } else /* we got the required ] */
+ i = nexttoken();
+ } else { /* immediate operand */
+ if (i != 0 && i != ',' && i != ':') {
+ error (ERR_NONFATAL, "comma or end of line expected");
+ do { /* error recovery */
+ i = nexttoken();
+ } while (i != 0 && i != ',');
+ } else if (i == ':') {
+ result->oprs[operand].type |= COLON;
+ }
+ }
+
+ /* now convert the exprs returned from evaluate() into operand
+ * descriptions... */
+
+ if (mref) { /* it's a memory reference */
+ expr *e = value;
+ int b, i, s; /* basereg, indexreg, scale */
+ long o; /* offset */
+
+ if (seg) { /* segment override */
+ if (seg[1].type!=0 || seg->value!=1 ||
+ REG_SREG & ~reg_flags[seg->type])
+ error (ERR_NONFATAL, "invalid segment override");
+ else if (result->nprefix == MAXPREFIX)
+ error (ERR_NONFATAL,
+ "instruction has more than %d prefixes",
+ MAXPREFIX);
+ else
+ result->prefixes[result->nprefix++] = seg->type;
+ }
+
+ b = i = -1, o = s = 0;
+
+ if (e->type < EXPR_SIMPLE) { /* this bit's a register */
+ if (e->value == 1) /* in fact it can be basereg */
+ b = e->type;
+ else /* no, it has to be indexreg */
+ i = e->type, s = e->value;
+ e++;
+ }
+ if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
+ if (e->value != 1) { /* it has to be indexreg */
+ if (i != -1) { /* but it can't be */
+ error(ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else
+ i = e->type, s = e->value;
+ } else { /* it can be basereg */
+ if (b != -1) /* or can it? */
+ i = e->type, s = 1;
+ else
+ b = e->type;
+ }
+ e++;
+ }
+ if (e->type != 0) { /* is there an offset? */
+ if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
+ error (ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else {
+ if (e->type == EXPR_SIMPLE) {
+ o = e->value;
+ e++;
+ }
+ if (e->type == EXPR_WRT) {
+ result->oprs[operand].wrt = e->value;
+ e++;
+ } else
+ result->oprs[operand].wrt = NO_SEG;
+ if (e->type != 0) { /* is there a segment id? */
+ if (e->type < EXPR_SEGBASE) {
+ error (ERR_NONFATAL,
+ "invalid effective address");
+ result->opcode = -1;
+ return result;
+ } else
+ result->oprs[operand].segment = (e->type -
+ EXPR_SEGBASE);
+ e++;
+ } else
+ result->oprs[operand].segment = NO_SEG;
+ }
+ } else {
+ o = 0;
+ result->oprs[operand].wrt = NO_SEG;
+ result->oprs[operand].segment = NO_SEG;
+ }
+
+ if (e->type != 0) { /* there'd better be nothing left! */
+ error (ERR_NONFATAL, "invalid effective address");
+ result->opcode = -1;
+ return result;
+ }
+
+ result->oprs[operand].type |= MEMORY;
+ if (b==-1 && (i==-1 || s==0))
+ result->oprs[operand].type |= MEM_OFFS;
+ result->oprs[operand].basereg = b;
+ result->oprs[operand].indexreg = i;
+ result->oprs[operand].scale = s;
+ result->oprs[operand].offset = o;
+ } else { /* it's not a memory reference */
+ if (is_reloc(value)) { /* it's immediate */
+ result->oprs[operand].type |= IMMEDIATE;
+ result->oprs[operand].offset = reloc_value(value);
+ result->oprs[operand].segment = reloc_seg(value);
+ result->oprs[operand].wrt = reloc_wrt(value);
+ if (is_simple(value) && reloc_value(value)==1)
+ result->oprs[operand].type |= UNITY;
+ } else { /* it's a register */
+ if (value->type>=EXPR_SIMPLE || value->value!=1) {
+ error (ERR_NONFATAL, "invalid operand type");
+ result->opcode = -1;
+ return result;
+ }
+ /* clear overrides, except TO which applies to FPU regs */
+ result->oprs[operand].type &= TO;
+ result->oprs[operand].type |= REGISTER;
+ result->oprs[operand].type |= reg_flags[value->type];
+ result->oprs[operand].basereg = value->type;
+ }
+ }
+ }
+
+ result->operands = operand; /* set operand count */
+
+ while (operand<3) /* clear remaining operands */
+ result->oprs[operand++].type = 0;
+
+ /*
+ * Transform RESW, RESD, RESQ, REST into RESB.
+ */
+ switch (result->opcode) {
+ case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
+ case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
+ case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
+ case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
+ }
+
+ return result;
+}
+
+static int is_comma_next (void) {
+ char *p;
+
+ p = bufptr;
+ while (isspace(*p)) p++;
+ return (*p == ',' || *p == ';' || !*p);
+}
+
+/* isidstart matches any character that may start an identifier, and isidchar
+ * matches any character that may appear at places other than the start of an
+ * identifier. E.g. a period may only appear at the start of an identifier
+ * (for local labels), whereas a number may appear anywhere *but* at the
+ * start. */
+
+#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
+#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
+ || (c)=='@' || (c)=='~' )
+
+/* Ditto for numeric constants. */
+
+#define isnumstart(c) ( isdigit(c) || (c)=='$' )
+#define isnumchar(c) ( isalnum(c) )
+
+/* This returns the numeric value of a given 'digit'. */
+
+#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
+
+/*
+ * This tokeniser routine has only one side effect, that of
+ * updating `bufptr'. Hence by saving `bufptr', lookahead may be
+ * performed.
+ */
+
+static int nexttoken (void) {
+ char ourcopy[256], *r, *s;
+
+ while (isspace(*bufptr)) bufptr++;
+ if (!*bufptr) return 0;
+
+ /* we have a token; either an id, a number or a char */
+ if (isidstart(*bufptr) ||
+ (*bufptr == '$' && isidstart(bufptr[1]))) {
+ /* now we've got an identifier */
+ int i;
+ int is_sym = FALSE;
+
+ if (*bufptr == '$') {
+ is_sym = TRUE;
+ bufptr++;
+ }
+
+ tokval.t_charptr = q;
+ *q++ = *bufptr++;
+ while (isidchar(*bufptr)) *q++ = *bufptr++;
+ *q++ = '\0';
+ for (s=tokval.t_charptr, r=ourcopy; *s; s++)
+ *r++ = tolower (*s);
+ *r = '\0';
+ if (is_sym)
+ return TOKEN_ID; /* bypass all other checks */
+ /* right, so we have an identifier sitting in temp storage. now,
+ * is it actually a register or instruction name, or what? */
+ if ((tokval.t_integer=bsi(ourcopy, reg_names,
+ elements(reg_names)))>=0)
+ return TOKEN_REG;
+ if ((tokval.t_integer=bsi(ourcopy, insn_names,
+ elements(insn_names)))>=0)
+ return TOKEN_INSN;
+ for (i=0; i<elements(icn); i++)
+ if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
+ char *p = ourcopy + strlen(icn[i]);
+ tokval.t_integer = ico[i];
+ if ((tokval.t_inttwo=bsi(p, conditions,
+ elements(conditions)))>=0)
+ return TOKEN_INSN;
+ }
+ if ((tokval.t_integer=bsi(ourcopy, prefix_names,
+ elements(prefix_names)))>=0) {
+ tokval.t_integer += PREFIX_ENUM_START;
+ return TOKEN_PREFIX;
+ }
+ if ((tokval.t_integer=bsi(ourcopy, special_names,
+ elements(special_names)))>=0)
+ return TOKEN_SPECIAL;
+ if (!strcmp(ourcopy, "seg"))
+ return TOKEN_SEG;
+ if (!strcmp(ourcopy, "wrt"))
+ return TOKEN_WRT;
+ return TOKEN_ID;
+ } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
+ /*
+ * It's a $ sign with no following hex number; this must
+ * mean it's a Here token ($), evaluating to the current
+ * assembly location, or a Base token ($$), evaluating to
+ * the base of the current segment.
+ */
+ bufptr++;
+ if (*bufptr == '$') {
+ bufptr++;
+ return TOKEN_BASE;
+ }
+ return TOKEN_HERE;
+ } else if (isnumstart(*bufptr)) { /* now we've got a number */
+ char *r = q;
+ int rn_error;
+
+ *q++ = *bufptr++;
+ while (isnumchar(*bufptr)) {
+ *q++ = *bufptr++;
+ }
+ if (*bufptr == '.') {
+ /*
+ * a floating point constant
+ */
+ *q++ = *bufptr++;
+ while (isnumchar(*bufptr)) {
+ *q++ = *bufptr++;
+ }
+ *q++ = '\0';
+ tokval.t_charptr = r;
+ return TOKEN_FLOAT;
+ }
+ *q++ = '\0';
+ tokval.t_integer = readnum(r, &rn_error);
+ if (rn_error)
+ return TOKEN_ERRNUM; /* some malformation occurred */
+ tokval.t_charptr = NULL;
+ return TOKEN_NUM;
+ } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
+ char quote = *bufptr++, *r;
+ r = tokval.t_charptr = bufptr;
+ while (*bufptr && *bufptr != quote) bufptr++;
+ tokval.t_inttwo = bufptr - r; /* store full version */
+ if (!*bufptr)
+ return TOKEN_ERRNUM; /* unmatched quotes */
+ tokval.t_integer = 0;
+ r = bufptr++; /* skip over final quote */
+ while (quote != *--r) {
+ tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
+ }
+ return TOKEN_NUM;
+ } else if (*bufptr == ';') { /* a comment has happened - stay */
+ return 0;
+ } else if ((*bufptr == '>' || *bufptr == '<' ||
+ *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
+ bufptr += 2;
+ return (bufptr[-2] == '>' ? TOKEN_SHR :
+ bufptr[-2] == '<' ? TOKEN_SHL :
+ bufptr[-2] == '/' ? TOKEN_SDIV :
+ TOKEN_SMOD);
+ } else /* just an ordinary char */
+ return (unsigned char) (*bufptr++);
+}
+
+/* return index of "string" in "array", or -1 if no match. */
+static int bsi (char *string, char **array, int size) {
+ int i = -1, j = size; /* always, i < index < j */
+ while (j-i >= 2) {
+ int k = (i+j)/2;
+ int l = strcmp(string, array[k]);
+ if (l<0) /* it's in the first half */
+ j = k;
+ else if (l>0) /* it's in the second half */
+ i = k;
+ else /* we've got it :) */
+ return k;
+ }
+ return -1; /* we haven't got it :( */
+}
+
+void cleanup_insn (insn *i) {
+ extop *e;
+
+ while (i->eops) {
+ e = i->eops;
+ i->eops = i->eops->next;
+ nasm_free (e);
+ }
+}
+
+/* ------------- Evaluator begins here ------------------ */
+
+static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
+
+/*
+ * Add two vector datatypes. We have some bizarre behaviour on far-
+ * absolute segment types: we preserve them during addition _only_
+ * if one of the segments is a truly pure scalar.
+ */
+static expr *add_vectors(expr *p, expr *q) {
+ expr *r = tempptr;
+ int preserve;
+
+ preserve = is_really_simple(p) || is_really_simple(q);
+
+ while (p->type && q->type &&
+ p->type < EXPR_SEGBASE+SEG_ABS &&
+ q->type < EXPR_SEGBASE+SEG_ABS)
+ if (p->type > q->type) {
+ tempptr->type = q->type;
+ tempptr->value = q->value;
+ tempptr++, q++;
+ } else if (p->type < q->type) {
+ tempptr->type = p->type;
+ tempptr->value = p->value;
+ tempptr++, p++;
+ } else { /* *p and *q have same type */
+ tempptr->type = p->type;
+ tempptr->value = p->value + q->value;
+ tempptr++, p++, q++;
+ }
+ while (p->type &&
+ (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
+ tempptr->type = p->type;
+ tempptr->value = p->value;
+ tempptr++, p++;
+ }
+ while (q->type &&
+ (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
+ tempptr->type = q->type;
+ tempptr->value = q->value;
+ tempptr++, q++;
+ }
+ (tempptr++)->type = 0;
+
+ return r;
+}
+
+/*
+ * Multiply a vector by a scalar. Strip far-absolute segment part
+ * if present.
+ */
+static expr *scalar_mult(expr *vect, long scalar) {
+ expr *p = vect;
+
+ while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
+ p->value = scalar * (p->value);
+ p++;
+ }
+ p->type = 0;
+
+ return vect;
+}
+
+static expr *scalarvect (long scalar) {
+ expr *p = tempptr;
+ tempptr->type = EXPR_SIMPLE;
+ tempptr->value = scalar;
+ tempptr++;
+ tempptr->type = 0;
+ tempptr++;
+ return p;
+}
+
+/*
+ * Return TRUE if the argument is a simple scalar. (Or a far-
+ * absolute, which counts.)
+ */
+static int is_simple (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
+ return 1;
+}
+
+/*
+ * Return TRUE if the argument is a simple scalar, _NOT_ a far-
+ * absolute.
+ */
+static int is_really_simple (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type) return 0;
+ return 1;
+}
+
+/*
+ * Return TRUE if the argument is relocatable (i.e. a simple
+ * scalar, plus at most one segment-base, plus possibly a WRT).
+ */
+static int is_reloc (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type < EXPR_SIMPLE)
+ return 0;
+ if (vect->type == EXPR_SIMPLE) {
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (!vect->type)
+ return 1;
+ }
+ do {
+ vect++;
+ } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
+ if (!vect->type)
+ return 1;
+ return 1;
+}
+
+/*
+ * Return the scalar part of a relocatable vector. (Including
+ * simple scalar vectors - those qualify as relocatable.)
+ */
+static long reloc_value (expr *vect) {
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type) return 0;
+ if (vect->type == EXPR_SIMPLE)
+ return vect->value;
+ else
+ return 0;
+}
+
+/*
+ * Return the segment number of a relocatable vector, or NO_SEG for
+ * simple scalars.
+ */
+static long reloc_seg (expr *vect) {
+ while (vect->type && (vect->type == EXPR_WRT || !vect->value))
+ vect++;
+ if (vect->type == EXPR_SIMPLE) {
+ do {
+ vect++;
+ } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
+ }
+ if (!vect->type)
+ return NO_SEG;
+ else
+ return vect->type - EXPR_SEGBASE;
+}
+
+/*
+ * Return the WRT segment number of a relocatable vector, or NO_SEG
+ * if no WRT part is present.
+ */
+static long reloc_wrt (expr *vect) {
+ while (vect->type && vect->type < EXPR_WRT)
+ vect++;
+ if (vect->type == EXPR_WRT) {
+ return vect->value;
+ } else
+ return NO_SEG;
+}
+
+static void eval_reset(void) {
+ tempptr = exprtempstorage; /* initialise temporary storage */
+}
+
+/*
+ * The SEG operator: calculate the segment part of a relocatable
+ * value. Return NULL, as usual, if an error occurs. Report the
+ * error too.
+ */
+static expr *segment_part (expr *e) {
+ long seg;
+
+ if (!is_reloc(e)) {
+ error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ }
+
+ seg = reloc_seg(e);
+ if (seg == NO_SEG) {
+ error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ } else if (seg & SEG_ABS)
+ return scalarvect(seg & ~SEG_ABS);
+ else {
+ expr *f = tempptr++;
+ tempptr++->type = 0;
+ f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
+ f->value = 1;
+ return f;
+ }
+}
+
+/*
+ * Recursive-descent parser. Called with a single boolean operand,
+ * which is TRUE if the evaluation is critical (i.e. unresolved
+ * symbols are an error condition). Must update the global `i' to
+ * reflect the token after the parsed string. May return NULL.
+ *
+ * evaluate() should report its own errors: on return it is assumed
+ * that if NULL has been returned, the error has already been
+ * reported.
+ */
+
+/*
+ * Grammar parsed is:
+ *
+ * expr : expr0 [ WRT expr6 ]
+ * expr0 : expr1 [ {|} expr1]
+ * expr1 : expr2 [ {^} expr2]
+ * expr2 : expr3 [ {&} expr3]
+ * expr3 : expr4 [ {<<,>>} expr4...]
+ * expr4 : expr5 [ {+,-} expr5...]
+ * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
+ * expr6 : { ~,+,-,SEG } expr6
+ * | (expr0)
+ * | symbol
+ * | $
+ * | number
+ */
+
+static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
+static expr *expr4(int), *expr5(int), *expr6(int);
+
+static expr *expr0(int critical) {
+ expr *e, *f;
+
+ e = expr1(critical);
+ if (!e)
+ return NULL;
+ while (i == '|') {
+ i = nexttoken();
+ f = expr1(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`|' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) | reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr1(int critical) {
+ expr *e, *f;
+
+ e = expr2(critical);
+ if (!e)
+ return NULL;
+ while (i == '^') {
+ i = nexttoken();
+ f = expr2(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`^' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) ^ reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr2(int critical) {
+ expr *e, *f;
+
+ e = expr3(critical);
+ if (!e)
+ return NULL;
+ while (i == '&') {
+ i = nexttoken();
+ f = expr3(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "`&' operator may only be applied to"
+ " scalar values");
+ }
+ e = scalarvect (reloc_value(e) & reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr3(int critical) {
+ expr *e, *f;
+
+ e = expr4(critical);
+ if (!e)
+ return NULL;
+ while (i == TOKEN_SHL || i == TOKEN_SHR) {
+ int j = i;
+ i = nexttoken();
+ f = expr4(critical);
+ if (!f)
+ return NULL;
+ if (!is_simple(e) || !is_simple(f)) {
+ error(ERR_NONFATAL, "shift operator may only be applied to"
+ " scalar values");
+ }
+ switch (j) {
+ case TOKEN_SHL:
+ e = scalarvect (reloc_value(e) << reloc_value(f));
+ break;
+ case TOKEN_SHR:
+ e = scalarvect (((unsigned long)reloc_value(e)) >>
+ reloc_value(f));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr4(int critical) {
+ expr *e, *f;
+
+ e = expr5(critical);
+ if (!e)
+ return NULL;
+ while (i == '+' || i == '-') {
+ int j = i;
+ i = nexttoken();
+ f = expr5(critical);
+ if (!f)
+ return NULL;
+ switch (j) {
+ case '+':
+ e = add_vectors (e, f);
+ break;
+ case '-':
+ e = add_vectors (e, scalar_mult(f, -1L));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr5(int critical) {
+ expr *e, *f;
+
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ while (i == '*' || i == '/' || i == '*' ||
+ i == TOKEN_SDIV || i == TOKEN_SMOD) {
+ int j = i;
+ i = nexttoken();
+ f = expr6(critical);
+ if (!f)
+ return NULL;
+ if (j != '*' && (!is_simple(e) || !is_simple(f))) {
+ error(ERR_NONFATAL, "division operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ if (j != '*' && reloc_value(f) == 0) {
+ error(ERR_NONFATAL, "division by zero");
+ return NULL;
+ }
+ switch (j) {
+ case '*':
+ if (is_simple(e))
+ e = scalar_mult (f, reloc_value(e));
+ else if (is_simple(f))
+ e = scalar_mult (e, reloc_value(f));
+ else {
+ error(ERR_NONFATAL, "unable to multiply two "
+ "non-scalar objects");
+ return NULL;
+ }
+ break;
+ case '/':
+ e = scalarvect (((unsigned long)reloc_value(e)) /
+ ((unsigned long)reloc_value(f)));
+ break;
+ case '%':
+ e = scalarvect (((unsigned long)reloc_value(e)) %
+ ((unsigned long)reloc_value(f)));
+ break;
+ case TOKEN_SDIV:
+ e = scalarvect (((signed long)reloc_value(e)) /
+ ((signed long)reloc_value(f)));
+ break;
+ case TOKEN_SMOD:
+ e = scalarvect (((signed long)reloc_value(e)) %
+ ((signed long)reloc_value(f)));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr6(int critical) {
+ expr *e;
+ long label_seg, label_ofs;
+
+ if (i == '-') {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ return scalar_mult (e, -1L);
+ } else if (i == '+') {
+ i = nexttoken();
+ return expr6(critical);
+ } else if (i == '~') {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ if (!is_simple(e)) {
+ error(ERR_NONFATAL, "`~' operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ return scalarvect(~reloc_value(e));
+ } else if (i == TOKEN_SEG) {
+ i = nexttoken();
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ return segment_part(e);
+ } else if (i == '(') {
+ i = nexttoken();
+ e = expr0(critical);
+ if (!e)
+ return NULL;
+ if (i != ')') {
+ error(ERR_NONFATAL, "expecting `)'");
+ return NULL;
+ }
+ i = nexttoken();
+ return e;
+ } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
+ i == TOKEN_HERE || i == TOKEN_BASE) {
+ e = tempptr;
+ switch (i) {
+ case TOKEN_NUM:
+ e->type = EXPR_SIMPLE;
+ e->value = tokval.t_integer;
+ break;
+ case TOKEN_REG:
+ e->type = tokval.t_integer;
+ e->value = 1;
+ break;
+ case TOKEN_ID:
+ case TOKEN_HERE:
+ case TOKEN_BASE:
+ /*
+ * Since the whole line is parsed before the label it
+ * defines is given to the label manager, we have
+ * problems with lines such as
+ *
+ * end: TIMES 512-(end-start) DB 0
+ *
+ * where `end' is not known on pass one, despite not
+ * really being a forward reference, and due to
+ * criticality it is _needed_. Hence we check our label
+ * against the currently defined one, and do our own
+ * resolution of it if we have to.
+ */
+ if (i == TOKEN_BASE) {
+ label_seg = seg;
+ label_ofs = 0;
+ } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
+ label_seg = seg;
+ label_ofs = ofs;
+ } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
+ if (critical == 2) {
+ error (ERR_NONFATAL, "symbol `%s' undefined",
+ tokval.t_charptr);
+ return NULL;
+ } else if (critical == 1) {
+ error (ERR_NONFATAL, "symbol `%s' not defined before use",
+ tokval.t_charptr);
+ return NULL;
+ } else {
+ label_seg = seg;
+ label_ofs = ofs;
+ }
+ }
+ e->type = EXPR_SIMPLE;
+ e->value = label_ofs;
+ if (label_seg!=NO_SEG) {
+ tempptr++;
+ tempptr->type = EXPR_SEGBASE + label_seg;
+ tempptr->value = 1;
+ }
+ break;
+ }
+ tempptr++;
+ tempptr->type = 0;
+ tempptr++;
+ i = nexttoken();
+ return e;
+ } else {
+ error(ERR_NONFATAL, "expression syntax error");
+ return NULL;
+ }
+}
+
+static expr *evaluate (int critical) {
+ expr *e;
+ expr *f = NULL;
+
+ e = expr0 (critical);
+ if (!e)
+ return NULL;
+
+ if (i == TOKEN_WRT) {
+ if (!is_reloc(e)) {
+ error(ERR_NONFATAL, "invalid left-hand operand to WRT");
+ return NULL;
+ }
+ i = nexttoken(); /* eat the WRT */
+ f = expr6 (critical);
+ if (!f)
+ return NULL;
+ }
+ e = scalar_mult (e, 1L); /* strip far-absolute segment part */
+ if (f) {
+ expr *g = tempptr++;
+ tempptr++->type = 0;
+ g->type = EXPR_WRT;
+ if (!is_reloc(f)) {
+ error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ g->value = reloc_seg(f);
+ if (g->value == NO_SEG)
+ g->value = reloc_value(f) | SEG_ABS;
+ else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
+ error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ e = add_vectors (e, g);
+ }
+ return e;
+}