diff options
Diffstat (limited to 'parser.c')
-rw-r--r-- | parser.c | 1306 |
1 files changed, 1306 insertions, 0 deletions
diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..14c7a5b --- /dev/null +++ b/parser.c @@ -0,0 +1,1306 @@ +/* parser.c source line parser for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <ctype.h> + +#include "nasm.h" +#include "nasmlib.h" +#include "parser.h" +#include "float.h" + +#include "names.c" + + +static long reg_flags[] = { /* sizes and special flags */ + 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, + REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, + REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, + REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, + REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, + MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, + REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, + FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, + REG_TREG +}; + +enum { /* special tokens */ + S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, + S_TWORD, S_WORD +}; + +static char *special_names[] = { /* and the actual text */ + "byte", "dword", "far", "long", "near", "qword", "short", "to", + "tword", "word" +}; + +static char *prefix_names[] = { + "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", + "repnz", "repz", "times" +}; + +/* + * Evaluator datatype. Expressions, within the evaluator, are + * stored as an array of these beasts, terminated by a record with + * type==0. Mostly, it's a vector type: each type denotes some kind + * of a component, and the value denotes the multiple of that + * component present in the expression. The exception is the WRT + * type, whose `value' field denotes the segment to which the + * expression is relative. These segments will be segment-base + * types, i.e. either odd segment values or SEG_ABS types. So it is + * still valid to assume that anything with a `value' field of zero + * is insignificant. + */ +typedef struct { + long type; /* a register, or EXPR_xxx */ + long value; /* must be >= 32 bits */ +} expr; + +static void eval_reset(void); +static expr *evaluate(int); + +/* + * ASSUMPTION MADE HERE. The number of distinct register names + * (i.e. possible "type" fields for an expr structure) does not + * exceed 126. + */ +#define EXPR_SIMPLE 126 +#define EXPR_WRT 127 +#define EXPR_SEGBASE 128 + +static int is_reloc(expr *); +static int is_simple(expr *); +static int is_really_simple (expr *); +static long reloc_value(expr *); +static long reloc_seg(expr *); +static long reloc_wrt(expr *); + +enum { /* token types, other than chars */ + TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, + TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, + TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, + TOKEN_FLOAT +}; + +struct tokenval { + long t_integer, t_inttwo; + char *t_charptr; +}; + +static char tempstorage[1024], *q; +static int bsi (char *string, char **array, int size);/* binary search */ + +static int nexttoken (void); +static int is_comma_next (void); + +static char *bufptr; +static int i; +static struct tokenval tokval; +static lfunc labelfunc; +static efunc error; +static char *label; +static struct ofmt *outfmt; + +static long seg, ofs; + +insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, + char *buffer, insn *result, struct ofmt *output, + efunc errfunc) { + int operand; + int critical; + + q = tempstorage; + bufptr = buffer; + labelfunc = lookup_label; + outfmt = output; + error = errfunc; + seg = segment; + ofs = offset; + label = ""; + + i = nexttoken(); + + result->eops = NULL; /* must do this, whatever happens */ + + if (i==0) { /* blank line - ignore */ + result->label = NULL; /* so, no label on it */ + result->opcode = -1; /* and no instruction either */ + return result; + } + if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && + (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { + error (ERR_NONFATAL, "label or instruction expected" + " at start of line"); + result->label = NULL; + result->opcode = -1; + return result; + } + + if (i == TOKEN_ID) { /* there's a label here */ + label = result->label = tokval.t_charptr; + i = nexttoken(); + if (i == ':') { /* skip over the optional colon */ + i = nexttoken(); + } + } else /* no label; so, moving swiftly on */ + result->label = NULL; + + if (i==0) { + result->opcode = -1; /* this line contains just a label */ + return result; + } + + result->nprefix = 0; + result->times = 1; + + while (i == TOKEN_PREFIX || + (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { + /* + * Handle special case: the TIMES prefix. + */ + if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { + expr *value; + + i = nexttoken(); + eval_reset(); + value = evaluate (pass); + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (!is_simple (value)) { + error (ERR_NONFATAL, + "non-constant argument supplied to TIMES"); + result->times = 1; + } else + result->times = value->value; + } else { + if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", MAXPREFIX); + else + result->prefixes[result->nprefix++] = tokval.t_integer; + i = nexttoken(); + } + } + + if (i != TOKEN_INSN) { + error (ERR_NONFATAL, "parser: instruction expected"); + result->opcode = -1; + return result; + } + + result->opcode = tokval.t_integer; + result->condition = tokval.t_inttwo; + + /* + * RESB, RESW and RESD cannot be satisfied with incorrectly + * evaluated operands, since the correct values _must_ be known + * on the first pass. Hence, even in pass one, we set the + * `critical' flag on calling evaluate(), so that it will bomb + * out on undefined symbols. Nasty, but there's nothing we can + * do about it. + * + * For the moment, EQU has the same difficulty, so we'll + * include that. + */ + if (result->opcode == I_RESB || + result->opcode == I_RESW || + result->opcode == I_RESD || + result->opcode == I_RESQ || + result->opcode == I_REST || + result->opcode == I_EQU) + critical = pass; + else + critical = (pass==2 ? 2 : 0); + + if (result->opcode == I_DB || + result->opcode == I_DW || + result->opcode == I_DD || + result->opcode == I_DQ || + result->opcode == I_DT) { + extop *eop, **tail = &result->eops; + int oper_num = 0; + + /* + * Begin to read the DB/DW/DD/DQ/DT operands. + */ + while (1) { + i = nexttoken(); + if (i == 0) + break; + eop = *tail = nasm_malloc(sizeof(extop)); + tail = &eop->next; + eop->next = NULL; + eop->type = EOT_NOTHING; + oper_num++; + + if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { + eop->type = EOT_DB_STRING; + eop->stringval = tokval.t_charptr; + eop->stringlen = tokval.t_inttwo; + i = nexttoken(); /* eat the comma */ + continue; + } + + if (i == TOKEN_FLOAT || i == '-') { + long sign = +1L; + + if (i == '-') { + char *save = bufptr; + i = nexttoken(); + sign = -1L; + if (i != TOKEN_FLOAT) { + bufptr = save; + i = '-'; + } + } + + if (i == TOKEN_FLOAT) { + eop->type = EOT_DB_STRING; + eop->stringval = q; + if (result->opcode == I_DD) + eop->stringlen = 4; + else if (result->opcode == I_DQ) + eop->stringlen = 8; + else if (result->opcode == I_DT) + eop->stringlen = 10; + else { + error(ERR_NONFATAL, "floating-point constant" + " encountered in `D%c' instruction", + result->opcode == I_DW ? 'W' : 'B'); + eop->type = EOT_NOTHING; + } + q += eop->stringlen; + if (!float_const (tokval.t_charptr, sign, + (unsigned char *)eop->stringval, + eop->stringlen, error)) + eop->type = EOT_NOTHING; + i = nexttoken(); /* eat the comma */ + continue; + } + } + + /* anything else */ { + expr *value; + eval_reset(); + value = evaluate (critical); + if (!value) { /* but, error in evaluator */ + result->opcode = -1;/* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (is_reloc(value)) { + eop->type = EOT_DB_NUMBER; + eop->offset = reloc_value(value); + eop->segment = reloc_seg(value); + eop->wrt = reloc_wrt(value); + } else { + error (ERR_NONFATAL, + "`%s' operand %d: expression is not simple" + " or relocatable", + insn_names[result->opcode], oper_num); + } + } + } + return result; + } + + /* right. Now we begin to parse the operands. There may be up to three + * of these, separated by commas, and terminated by a zero token. */ + + for (operand = 0; operand < 3; operand++) { + expr *seg, *value; /* used most of the time */ + int mref; /* is this going to be a memory ref? */ + + result->oprs[operand].addr_size = 0;/* have to zero this whatever */ + i = nexttoken(); + if (i == 0) break; /* end of operands: get out of here */ + result->oprs[operand].type = 0; /* so far, no override */ + while (i == TOKEN_SPECIAL) {/* size specifiers */ + switch ((int)tokval.t_integer) { + case S_BYTE: + result->oprs[operand].type |= BITS8; + break; + case S_WORD: + result->oprs[operand].type |= BITS16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].type |= BITS32; + break; + case S_QWORD: + result->oprs[operand].type |= BITS64; + break; + case S_TWORD: + result->oprs[operand].type |= BITS80; + break; + case S_TO: + result->oprs[operand].type |= TO; + break; + case S_FAR: + result->oprs[operand].type |= FAR; + break; + case S_NEAR: + result->oprs[operand].type |= NEAR; + break; + case S_SHORT: + result->oprs[operand].type |= SHORT; + break; + } + i = nexttoken(); + } + + if (i == '[') { /* memory reference */ + i = nexttoken(); + mref = TRUE; + if (i == TOKEN_SPECIAL) { /* check for address size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + } else /* immediate operand, or register */ + mref = FALSE; + + eval_reset(); + + value = evaluate (critical); + if (!value) { /* error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (i == ':' && mref) { /* it was seg:offset */ + seg = value; /* so shift this into the segment */ + i = nexttoken(); /* then skip the colon */ + if (i == TOKEN_SPECIAL) { /* another check for size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + value = evaluate (critical); + /* and get the offset */ + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + } else seg = NULL; + if (mref) { /* find ] at the end */ + if (i != ']') { + error (ERR_NONFATAL, "parser: expecting ]"); + do { /* error recovery again */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else /* we got the required ] */ + i = nexttoken(); + } else { /* immediate operand */ + if (i != 0 && i != ',' && i != ':') { + error (ERR_NONFATAL, "comma or end of line expected"); + do { /* error recovery */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else if (i == ':') { + result->oprs[operand].type |= COLON; + } + } + + /* now convert the exprs returned from evaluate() into operand + * descriptions... */ + + if (mref) { /* it's a memory reference */ + expr *e = value; + int b, i, s; /* basereg, indexreg, scale */ + long o; /* offset */ + + if (seg) { /* segment override */ + if (seg[1].type!=0 || seg->value!=1 || + REG_SREG & ~reg_flags[seg->type]) + error (ERR_NONFATAL, "invalid segment override"); + else if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", + MAXPREFIX); + else + result->prefixes[result->nprefix++] = seg->type; + } + + b = i = -1, o = s = 0; + + if (e->type < EXPR_SIMPLE) { /* this bit's a register */ + if (e->value == 1) /* in fact it can be basereg */ + b = e->type; + else /* no, it has to be indexreg */ + i = e->type, s = e->value; + e++; + } + if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ + if (e->value != 1) { /* it has to be indexreg */ + if (i != -1) { /* but it can't be */ + error(ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else + i = e->type, s = e->value; + } else { /* it can be basereg */ + if (b != -1) /* or can it? */ + i = e->type, s = 1; + else + b = e->type; + } + e++; + } + if (e->type != 0) { /* is there an offset? */ + if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else { + if (e->type == EXPR_SIMPLE) { + o = e->value; + e++; + } + if (e->type == EXPR_WRT) { + result->oprs[operand].wrt = e->value; + e++; + } else + result->oprs[operand].wrt = NO_SEG; + if (e->type != 0) { /* is there a segment id? */ + if (e->type < EXPR_SEGBASE) { + error (ERR_NONFATAL, + "invalid effective address"); + result->opcode = -1; + return result; + } else + result->oprs[operand].segment = (e->type - + EXPR_SEGBASE); + e++; + } else + result->oprs[operand].segment = NO_SEG; + } + } else { + o = 0; + result->oprs[operand].wrt = NO_SEG; + result->oprs[operand].segment = NO_SEG; + } + + if (e->type != 0) { /* there'd better be nothing left! */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } + + result->oprs[operand].type |= MEMORY; + if (b==-1 && (i==-1 || s==0)) + result->oprs[operand].type |= MEM_OFFS; + result->oprs[operand].basereg = b; + result->oprs[operand].indexreg = i; + result->oprs[operand].scale = s; + result->oprs[operand].offset = o; + } else { /* it's not a memory reference */ + if (is_reloc(value)) { /* it's immediate */ + result->oprs[operand].type |= IMMEDIATE; + result->oprs[operand].offset = reloc_value(value); + result->oprs[operand].segment = reloc_seg(value); + result->oprs[operand].wrt = reloc_wrt(value); + if (is_simple(value) && reloc_value(value)==1) + result->oprs[operand].type |= UNITY; + } else { /* it's a register */ + if (value->type>=EXPR_SIMPLE || value->value!=1) { + error (ERR_NONFATAL, "invalid operand type"); + result->opcode = -1; + return result; + } + /* clear overrides, except TO which applies to FPU regs */ + result->oprs[operand].type &= TO; + result->oprs[operand].type |= REGISTER; + result->oprs[operand].type |= reg_flags[value->type]; + result->oprs[operand].basereg = value->type; + } + } + } + + result->operands = operand; /* set operand count */ + + while (operand<3) /* clear remaining operands */ + result->oprs[operand++].type = 0; + + /* + * Transform RESW, RESD, RESQ, REST into RESB. + */ + switch (result->opcode) { + case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; + case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; + case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; + case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; + } + + return result; +} + +static int is_comma_next (void) { + char *p; + + p = bufptr; + while (isspace(*p)) p++; + return (*p == ',' || *p == ';' || !*p); +} + +/* isidstart matches any character that may start an identifier, and isidchar + * matches any character that may appear at places other than the start of an + * identifier. E.g. a period may only appear at the start of an identifier + * (for local labels), whereas a number may appear anywhere *but* at the + * start. */ + +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) +#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ + || (c)=='@' || (c)=='~' ) + +/* Ditto for numeric constants. */ + +#define isnumstart(c) ( isdigit(c) || (c)=='$' ) +#define isnumchar(c) ( isalnum(c) ) + +/* This returns the numeric value of a given 'digit'. */ + +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +/* + * This tokeniser routine has only one side effect, that of + * updating `bufptr'. Hence by saving `bufptr', lookahead may be + * performed. + */ + +static int nexttoken (void) { + char ourcopy[256], *r, *s; + + while (isspace(*bufptr)) bufptr++; + if (!*bufptr) return 0; + + /* we have a token; either an id, a number or a char */ + if (isidstart(*bufptr) || + (*bufptr == '$' && isidstart(bufptr[1]))) { + /* now we've got an identifier */ + int i; + int is_sym = FALSE; + + if (*bufptr == '$') { + is_sym = TRUE; + bufptr++; + } + + tokval.t_charptr = q; + *q++ = *bufptr++; + while (isidchar(*bufptr)) *q++ = *bufptr++; + *q++ = '\0'; + for (s=tokval.t_charptr, r=ourcopy; *s; s++) + *r++ = tolower (*s); + *r = '\0'; + if (is_sym) + return TOKEN_ID; /* bypass all other checks */ + /* right, so we have an identifier sitting in temp storage. now, + * is it actually a register or instruction name, or what? */ + if ((tokval.t_integer=bsi(ourcopy, reg_names, + elements(reg_names)))>=0) + return TOKEN_REG; + if ((tokval.t_integer=bsi(ourcopy, insn_names, + elements(insn_names)))>=0) + return TOKEN_INSN; + for (i=0; i<elements(icn); i++) + if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) { + char *p = ourcopy + strlen(icn[i]); + tokval.t_integer = ico[i]; + if ((tokval.t_inttwo=bsi(p, conditions, + elements(conditions)))>=0) + return TOKEN_INSN; + } + if ((tokval.t_integer=bsi(ourcopy, prefix_names, + elements(prefix_names)))>=0) { + tokval.t_integer += PREFIX_ENUM_START; + return TOKEN_PREFIX; + } + if ((tokval.t_integer=bsi(ourcopy, special_names, + elements(special_names)))>=0) + return TOKEN_SPECIAL; + if (!strcmp(ourcopy, "seg")) + return TOKEN_SEG; + if (!strcmp(ourcopy, "wrt")) + return TOKEN_WRT; + return TOKEN_ID; + } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { + /* + * It's a $ sign with no following hex number; this must + * mean it's a Here token ($), evaluating to the current + * assembly location, or a Base token ($$), evaluating to + * the base of the current segment. + */ + bufptr++; + if (*bufptr == '$') { + bufptr++; + return TOKEN_BASE; + } + return TOKEN_HERE; + } else if (isnumstart(*bufptr)) { /* now we've got a number */ + char *r = q; + int rn_error; + + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + if (*bufptr == '.') { + /* + * a floating point constant + */ + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + *q++ = '\0'; + tokval.t_charptr = r; + return TOKEN_FLOAT; + } + *q++ = '\0'; + tokval.t_integer = readnum(r, &rn_error); + if (rn_error) + return TOKEN_ERRNUM; /* some malformation occurred */ + tokval.t_charptr = NULL; + return TOKEN_NUM; + } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ + char quote = *bufptr++, *r; + r = tokval.t_charptr = bufptr; + while (*bufptr && *bufptr != quote) bufptr++; + tokval.t_inttwo = bufptr - r; /* store full version */ + if (!*bufptr) + return TOKEN_ERRNUM; /* unmatched quotes */ + tokval.t_integer = 0; + r = bufptr++; /* skip over final quote */ + while (quote != *--r) { + tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; + } + return TOKEN_NUM; + } else if (*bufptr == ';') { /* a comment has happened - stay */ + return 0; + } else if ((*bufptr == '>' || *bufptr == '<' || + *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { + bufptr += 2; + return (bufptr[-2] == '>' ? TOKEN_SHR : + bufptr[-2] == '<' ? TOKEN_SHL : + bufptr[-2] == '/' ? TOKEN_SDIV : + TOKEN_SMOD); + } else /* just an ordinary char */ + return (unsigned char) (*bufptr++); +} + +/* return index of "string" in "array", or -1 if no match. */ +static int bsi (char *string, char **array, int size) { + int i = -1, j = size; /* always, i < index < j */ + while (j-i >= 2) { + int k = (i+j)/2; + int l = strcmp(string, array[k]); + if (l<0) /* it's in the first half */ + j = k; + else if (l>0) /* it's in the second half */ + i = k; + else /* we've got it :) */ + return k; + } + return -1; /* we haven't got it :( */ +} + +void cleanup_insn (insn *i) { + extop *e; + + while (i->eops) { + e = i->eops; + i->eops = i->eops->next; + nasm_free (e); + } +} + +/* ------------- Evaluator begins here ------------------ */ + +static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ + +/* + * Add two vector datatypes. We have some bizarre behaviour on far- + * absolute segment types: we preserve them during addition _only_ + * if one of the segments is a truly pure scalar. + */ +static expr *add_vectors(expr *p, expr *q) { + expr *r = tempptr; + int preserve; + + preserve = is_really_simple(p) || is_really_simple(q); + + while (p->type && q->type && + p->type < EXPR_SEGBASE+SEG_ABS && + q->type < EXPR_SEGBASE+SEG_ABS) + if (p->type > q->type) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } else if (p->type < q->type) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } else { /* *p and *q have same type */ + tempptr->type = p->type; + tempptr->value = p->value + q->value; + tempptr++, p++, q++; + } + while (p->type && + (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } + while (q->type && + (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } + (tempptr++)->type = 0; + + return r; +} + +/* + * Multiply a vector by a scalar. Strip far-absolute segment part + * if present. + */ +static expr *scalar_mult(expr *vect, long scalar) { + expr *p = vect; + + while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { + p->value = scalar * (p->value); + p++; + } + p->type = 0; + + return vect; +} + +static expr *scalarvect (long scalar) { + expr *p = tempptr; + tempptr->type = EXPR_SIMPLE; + tempptr->value = scalar; + tempptr++; + tempptr->type = 0; + tempptr++; + return p; +} + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +static int is_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +static int is_really_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +static int is_reloc (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type < EXPR_SIMPLE) + return 0; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) + return 1; + } + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + if (!vect->type) + return 1; + return 1; +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +static long reloc_value (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +static long reloc_seg (expr *vect) { + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +static long reloc_wrt (expr *vect) { + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + +static void eval_reset(void) { + tempptr = exprtempstorage; /* initialise temporary storage */ +} + +/* + * The SEG operator: calculate the segment part of a relocatable + * value. Return NULL, as usual, if an error occurs. Report the + * error too. + */ +static expr *segment_part (expr *e) { + long seg; + + if (!is_reloc(e)) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } + + seg = reloc_seg(e); + if (seg == NO_SEG) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } else if (seg & SEG_ABS) + return scalarvect(seg & ~SEG_ABS); + else { + expr *f = tempptr++; + tempptr++->type = 0; + f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); + f->value = 1; + return f; + } +} + +/* + * Recursive-descent parser. Called with a single boolean operand, + * which is TRUE if the evaluation is critical (i.e. unresolved + * symbols are an error condition). Must update the global `i' to + * reflect the token after the parsed string. May return NULL. + * + * evaluate() should report its own errors: on return it is assumed + * that if NULL has been returned, the error has already been + * reported. + */ + +/* + * Grammar parsed is: + * + * expr : expr0 [ WRT expr6 ] + * expr0 : expr1 [ {|} expr1] + * expr1 : expr2 [ {^} expr2] + * expr2 : expr3 [ {&} expr3] + * expr3 : expr4 [ {<<,>>} expr4...] + * expr4 : expr5 [ {+,-} expr5...] + * expr5 : expr6 [ {*,/,%,//,%%} expr6...] + * expr6 : { ~,+,-,SEG } expr6 + * | (expr0) + * | symbol + * | $ + * | number + */ + +static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); +static expr *expr4(int), *expr5(int), *expr6(int); + +static expr *expr0(int critical) { + expr *e, *f; + + e = expr1(critical); + if (!e) + return NULL; + while (i == '|') { + i = nexttoken(); + f = expr1(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) | reloc_value(f)); + } + return e; +} + +static expr *expr1(int critical) { + expr *e, *f; + + e = expr2(critical); + if (!e) + return NULL; + while (i == '^') { + i = nexttoken(); + f = expr2(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) ^ reloc_value(f)); + } + return e; +} + +static expr *expr2(int critical) { + expr *e, *f; + + e = expr3(critical); + if (!e) + return NULL; + while (i == '&') { + i = nexttoken(); + f = expr3(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) & reloc_value(f)); + } + return e; +} + +static expr *expr3(int critical) { + expr *e, *f; + + e = expr4(critical); + if (!e) + return NULL; + while (i == TOKEN_SHL || i == TOKEN_SHR) { + int j = i; + i = nexttoken(); + f = expr4(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "shift operator may only be applied to" + " scalar values"); + } + switch (j) { + case TOKEN_SHL: + e = scalarvect (reloc_value(e) << reloc_value(f)); + break; + case TOKEN_SHR: + e = scalarvect (((unsigned long)reloc_value(e)) >> + reloc_value(f)); + break; + } + } + return e; +} + +static expr *expr4(int critical) { + expr *e, *f; + + e = expr5(critical); + if (!e) + return NULL; + while (i == '+' || i == '-') { + int j = i; + i = nexttoken(); + f = expr5(critical); + if (!f) + return NULL; + switch (j) { + case '+': + e = add_vectors (e, f); + break; + case '-': + e = add_vectors (e, scalar_mult(f, -1L)); + break; + } + } + return e; +} + +static expr *expr5(int critical) { + expr *e, *f; + + e = expr6(critical); + if (!e) + return NULL; + while (i == '*' || i == '/' || i == '*' || + i == TOKEN_SDIV || i == TOKEN_SMOD) { + int j = i; + i = nexttoken(); + f = expr6(critical); + if (!f) + return NULL; + if (j != '*' && (!is_simple(e) || !is_simple(f))) { + error(ERR_NONFATAL, "division operator may only be applied to" + " scalar values"); + return NULL; + } + if (j != '*' && reloc_value(f) == 0) { + error(ERR_NONFATAL, "division by zero"); + return NULL; + } + switch (j) { + case '*': + if (is_simple(e)) + e = scalar_mult (f, reloc_value(e)); + else if (is_simple(f)) + e = scalar_mult (e, reloc_value(f)); + else { + error(ERR_NONFATAL, "unable to multiply two " + "non-scalar objects"); + return NULL; + } + break; + case '/': + e = scalarvect (((unsigned long)reloc_value(e)) / + ((unsigned long)reloc_value(f))); + break; + case '%': + e = scalarvect (((unsigned long)reloc_value(e)) % + ((unsigned long)reloc_value(f))); + break; + case TOKEN_SDIV: + e = scalarvect (((signed long)reloc_value(e)) / + ((signed long)reloc_value(f))); + break; + case TOKEN_SMOD: + e = scalarvect (((signed long)reloc_value(e)) % + ((signed long)reloc_value(f))); + break; + } + } + return e; +} + +static expr *expr6(int critical) { + expr *e; + long label_seg, label_ofs; + + if (i == '-') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return scalar_mult (e, -1L); + } else if (i == '+') { + i = nexttoken(); + return expr6(critical); + } else if (i == '~') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + if (!is_simple(e)) { + error(ERR_NONFATAL, "`~' operator may only be applied to" + " scalar values"); + return NULL; + } + return scalarvect(~reloc_value(e)); + } else if (i == TOKEN_SEG) { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return segment_part(e); + } else if (i == '(') { + i = nexttoken(); + e = expr0(critical); + if (!e) + return NULL; + if (i != ')') { + error(ERR_NONFATAL, "expecting `)'"); + return NULL; + } + i = nexttoken(); + return e; + } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || + i == TOKEN_HERE || i == TOKEN_BASE) { + e = tempptr; + switch (i) { + case TOKEN_NUM: + e->type = EXPR_SIMPLE; + e->value = tokval.t_integer; + break; + case TOKEN_REG: + e->type = tokval.t_integer; + e->value = 1; + break; + case TOKEN_ID: + case TOKEN_HERE: + case TOKEN_BASE: + /* + * Since the whole line is parsed before the label it + * defines is given to the label manager, we have + * problems with lines such as + * + * end: TIMES 512-(end-start) DB 0 + * + * where `end' is not known on pass one, despite not + * really being a forward reference, and due to + * criticality it is _needed_. Hence we check our label + * against the currently defined one, and do our own + * resolution of it if we have to. + */ + if (i == TOKEN_BASE) { + label_seg = seg; + label_ofs = 0; + } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { + label_seg = seg; + label_ofs = ofs; + } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { + if (critical == 2) { + error (ERR_NONFATAL, "symbol `%s' undefined", + tokval.t_charptr); + return NULL; + } else if (critical == 1) { + error (ERR_NONFATAL, "symbol `%s' not defined before use", + tokval.t_charptr); + return NULL; + } else { + label_seg = seg; + label_ofs = ofs; + } + } + e->type = EXPR_SIMPLE; + e->value = label_ofs; + if (label_seg!=NO_SEG) { + tempptr++; + tempptr->type = EXPR_SEGBASE + label_seg; + tempptr->value = 1; + } + break; + } + tempptr++; + tempptr->type = 0; + tempptr++; + i = nexttoken(); + return e; + } else { + error(ERR_NONFATAL, "expression syntax error"); + return NULL; + } +} + +static expr *evaluate (int critical) { + expr *e; + expr *f = NULL; + + e = expr0 (critical); + if (!e) + return NULL; + + if (i == TOKEN_WRT) { + if (!is_reloc(e)) { + error(ERR_NONFATAL, "invalid left-hand operand to WRT"); + return NULL; + } + i = nexttoken(); /* eat the WRT */ + f = expr6 (critical); + if (!f) + return NULL; + } + e = scalar_mult (e, 1L); /* strip far-absolute segment part */ + if (f) { + expr *g = tempptr++; + tempptr++->type = 0; + g->type = EXPR_WRT; + if (!is_reloc(f)) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + g->value = reloc_seg(f); + if (g->value == NO_SEG) + g->value = reloc_value(f) | SEG_ABS; + else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + e = add_vectors (e, g); + } + return e; +} |