/* parser.c source line parser for the Netwide Assembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. * * initial version 27/iii/95 by Simon Tatham */ #include #include #include #include #include #include "nasm.h" #include "nasmlib.h" #include "parser.h" #include "float.h" #include "names.c" static long reg_flags[] = { /* sizes and special flags */ 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG }; enum { /* special tokens */ S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, S_TWORD, S_WORD }; static char *special_names[] = { /* and the actual text */ "byte", "dword", "far", "long", "near", "qword", "short", "to", "tword", "word" }; static char *prefix_names[] = { "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", "repnz", "repz", "times" }; /* * Evaluator datatype. Expressions, within the evaluator, are * stored as an array of these beasts, terminated by a record with * type==0. Mostly, it's a vector type: each type denotes some kind * of a component, and the value denotes the multiple of that * component present in the expression. The exception is the WRT * type, whose `value' field denotes the segment to which the * expression is relative. These segments will be segment-base * types, i.e. either odd segment values or SEG_ABS types. So it is * still valid to assume that anything with a `value' field of zero * is insignificant. */ typedef struct { long type; /* a register, or EXPR_xxx */ long value; /* must be >= 32 bits */ } expr; static void eval_reset(void); static expr *evaluate(int); /* * ASSUMPTION MADE HERE. The number of distinct register names * (i.e. possible "type" fields for an expr structure) does not * exceed 126. */ #define EXPR_SIMPLE 126 #define EXPR_WRT 127 #define EXPR_SEGBASE 128 static int is_reloc(expr *); static int is_simple(expr *); static int is_really_simple (expr *); static long reloc_value(expr *); static long reloc_seg(expr *); static long reloc_wrt(expr *); enum { /* token types, other than chars */ TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, TOKEN_FLOAT }; struct tokenval { long t_integer, t_inttwo; char *t_charptr; }; static char tempstorage[1024], *q; static int bsi (char *string, char **array, int size);/* binary search */ static int nexttoken (void); static int is_comma_next (void); static char *bufptr; static int i; static struct tokenval tokval; static lfunc labelfunc; static efunc error; static char *label; static struct ofmt *outfmt; static long seg, ofs; insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, char *buffer, insn *result, struct ofmt *output, efunc errfunc) { int operand; int critical; q = tempstorage; bufptr = buffer; labelfunc = lookup_label; outfmt = output; error = errfunc; seg = segment; ofs = offset; label = ""; i = nexttoken(); result->eops = NULL; /* must do this, whatever happens */ if (i==0) { /* blank line - ignore */ result->label = NULL; /* so, no label on it */ result->opcode = -1; /* and no instruction either */ return result; } if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { error (ERR_NONFATAL, "label or instruction expected" " at start of line"); result->label = NULL; result->opcode = -1; return result; } if (i == TOKEN_ID) { /* there's a label here */ label = result->label = tokval.t_charptr; i = nexttoken(); if (i == ':') { /* skip over the optional colon */ i = nexttoken(); } } else /* no label; so, moving swiftly on */ result->label = NULL; if (i==0) { result->opcode = -1; /* this line contains just a label */ return result; } result->nprefix = 0; result->times = 1; while (i == TOKEN_PREFIX || (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { /* * Handle special case: the TIMES prefix. */ if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { expr *value; i = nexttoken(); eval_reset(); value = evaluate (pass); if (!value) { /* but, error in evaluator */ result->opcode = -1; /* unrecoverable parse error: */ return result; /* ignore this instruction */ } if (!is_simple (value)) { error (ERR_NONFATAL, "non-constant argument supplied to TIMES"); result->times = 1; } else result->times = value->value; } else { if (result->nprefix == MAXPREFIX) error (ERR_NONFATAL, "instruction has more than %d prefixes", MAXPREFIX); else result->prefixes[result->nprefix++] = tokval.t_integer; i = nexttoken(); } } if (i != TOKEN_INSN) { error (ERR_NONFATAL, "parser: instruction expected"); result->opcode = -1; return result; } result->opcode = tokval.t_integer; result->condition = tokval.t_inttwo; /* * RESB, RESW and RESD cannot be satisfied with incorrectly * evaluated operands, since the correct values _must_ be known * on the first pass. Hence, even in pass one, we set the * `critical' flag on calling evaluate(), so that it will bomb * out on undefined symbols. Nasty, but there's nothing we can * do about it. * * For the moment, EQU has the same difficulty, so we'll * include that. */ if (result->opcode == I_RESB || result->opcode == I_RESW || result->opcode == I_RESD || result->opcode == I_RESQ || result->opcode == I_REST || result->opcode == I_EQU) critical = pass; else critical = (pass==2 ? 2 : 0); if (result->opcode == I_DB || result->opcode == I_DW || result->opcode == I_DD || result->opcode == I_DQ || result->opcode == I_DT) { extop *eop, **tail = &result->eops; int oper_num = 0; /* * Begin to read the DB/DW/DD/DQ/DT operands. */ while (1) { i = nexttoken(); if (i == 0) break; eop = *tail = nasm_malloc(sizeof(extop)); tail = &eop->next; eop->next = NULL; eop->type = EOT_NOTHING; oper_num++; if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { eop->type = EOT_DB_STRING; eop->stringval = tokval.t_charptr; eop->stringlen = tokval.t_inttwo; i = nexttoken(); /* eat the comma */ continue; } if (i == TOKEN_FLOAT || i == '-') { long sign = +1L; if (i == '-') { char *save = bufptr; i = nexttoken(); sign = -1L; if (i != TOKEN_FLOAT) { bufptr = save; i = '-'; } } if (i == TOKEN_FLOAT) { eop->type = EOT_DB_STRING; eop->stringval = q; if (result->opcode == I_DD) eop->stringlen = 4; else if (result->opcode == I_DQ) eop->stringlen = 8; else if (result->opcode == I_DT) eop->stringlen = 10; else { error(ERR_NONFATAL, "floating-point constant" " encountered in `D%c' instruction", result->opcode == I_DW ? 'W' : 'B'); eop->type = EOT_NOTHING; } q += eop->stringlen; if (!float_const (tokval.t_charptr, sign, (unsigned char *)eop->stringval, eop->stringlen, error)) eop->type = EOT_NOTHING; i = nexttoken(); /* eat the comma */ continue; } } /* anything else */ { expr *value; eval_reset(); value = evaluate (critical); if (!value) { /* but, error in evaluator */ result->opcode = -1;/* unrecoverable parse error: */ return result; /* ignore this instruction */ } if (is_reloc(value)) { eop->type = EOT_DB_NUMBER; eop->offset = reloc_value(value); eop->segment = reloc_seg(value); eop->wrt = reloc_wrt(value); } else { error (ERR_NONFATAL, "`%s' operand %d: expression is not simple" " or relocatable", insn_names[result->opcode], oper_num); } } } return result; } /* right. Now we begin to parse the operands. There may be up to three * of these, separated by commas, and terminated by a zero token. */ for (operand = 0; operand < 3; operand++) { expr *seg, *value; /* used most of the time */ int mref; /* is this going to be a memory ref? */ result->oprs[operand].addr_size = 0;/* have to zero this whatever */ i = nexttoken(); if (i == 0) break; /* end of operands: get out of here */ result->oprs[operand].type = 0; /* so far, no override */ while (i == TOKEN_SPECIAL) {/* size specifiers */ switch ((int)tokval.t_integer) { case S_BYTE: result->oprs[operand].type |= BITS8; break; case S_WORD: result->oprs[operand].type |= BITS16; break; case S_DWORD: case S_LONG: result->oprs[operand].type |= BITS32; break; case S_QWORD: result->oprs[operand].type |= BITS64; break; case S_TWORD: result->oprs[operand].type |= BITS80; break; case S_TO: result->oprs[operand].type |= TO; break; case S_FAR: result->oprs[operand].type |= FAR; break; case S_NEAR: result->oprs[operand].type |= NEAR; break; case S_SHORT: result->oprs[operand].type |= SHORT; break; } i = nexttoken(); } if (i == '[') { /* memory reference */ i = nexttoken(); mref = TRUE; if (i == TOKEN_SPECIAL) { /* check for address size override */ switch ((int)tokval.t_integer) { case S_WORD: result->oprs[operand].addr_size = 16; break; case S_DWORD: case S_LONG: result->oprs[operand].addr_size = 32; break; default: error (ERR_NONFATAL, "invalid size specification in" " effective address"); } i = nexttoken(); } } else /* immediate operand, or register */ mref = FALSE; eval_reset(); value = evaluate (critical); if (!value) { /* error in evaluator */ result->opcode = -1; /* unrecoverable parse error: */ return result; /* ignore this instruction */ } if (i == ':' && mref) { /* it was seg:offset */ seg = value; /* so shift this into the segment */ i = nexttoken(); /* then skip the colon */ if (i == TOKEN_SPECIAL) { /* another check for size override */ switch ((int)tokval.t_integer) { case S_WORD: result->oprs[operand].addr_size = 16; break; case S_DWORD: case S_LONG: result->oprs[operand].addr_size = 32; break; default: error (ERR_NONFATAL, "invalid size specification in" " effective address"); } i = nexttoken(); } value = evaluate (critical); /* and get the offset */ if (!value) { /* but, error in evaluator */ result->opcode = -1; /* unrecoverable parse error: */ return result; /* ignore this instruction */ } } else seg = NULL; if (mref) { /* find ] at the end */ if (i != ']') { error (ERR_NONFATAL, "parser: expecting ]"); do { /* error recovery again */ i = nexttoken(); } while (i != 0 && i != ','); } else /* we got the required ] */ i = nexttoken(); } else { /* immediate operand */ if (i != 0 && i != ',' && i != ':') { error (ERR_NONFATAL, "comma or end of line expected"); do { /* error recovery */ i = nexttoken(); } while (i != 0 && i != ','); } else if (i == ':') { result->oprs[operand].type |= COLON; } } /* now convert the exprs returned from evaluate() into operand * descriptions... */ if (mref) { /* it's a memory reference */ expr *e = value; int b, i, s; /* basereg, indexreg, scale */ long o; /* offset */ if (seg) { /* segment override */ if (seg[1].type!=0 || seg->value!=1 || REG_SREG & ~reg_flags[seg->type]) error (ERR_NONFATAL, "invalid segment override"); else if (result->nprefix == MAXPREFIX) error (ERR_NONFATAL, "instruction has more than %d prefixes", MAXPREFIX); else result->prefixes[result->nprefix++] = seg->type; } b = i = -1, o = s = 0; if (e->type < EXPR_SIMPLE) { /* this bit's a register */ if (e->value == 1) /* in fact it can be basereg */ b = e->type; else /* no, it has to be indexreg */ i = e->type, s = e->value; e++; } if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ if (e->value != 1) { /* it has to be indexreg */ if (i != -1) { /* but it can't be */ error(ERR_NONFATAL, "invalid effective address"); result->opcode = -1; return result; } else i = e->type, s = e->value; } else { /* it can be basereg */ if (b != -1) /* or can it? */ i = e->type, s = 1; else b = e->type; } e++; } if (e->type != 0) { /* is there an offset? */ if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ error (ERR_NONFATAL, "invalid effective address"); result->opcode = -1; return result; } else { if (e->type == EXPR_SIMPLE) { o = e->value; e++; } if (e->type == EXPR_WRT) { result->oprs[operand].wrt = e->value; e++; } else result->oprs[operand].wrt = NO_SEG; if (e->type != 0) { /* is there a segment id? */ if (e->type < EXPR_SEGBASE) { error (ERR_NONFATAL, "invalid effective address"); result->opcode = -1; return result; } else result->oprs[operand].segment = (e->type - EXPR_SEGBASE); e++; } else result->oprs[operand].segment = NO_SEG; } } else { o = 0; result->oprs[operand].wrt = NO_SEG; result->oprs[operand].segment = NO_SEG; } if (e->type != 0) { /* there'd better be nothing left! */ error (ERR_NONFATAL, "invalid effective address"); result->opcode = -1; return result; } result->oprs[operand].type |= MEMORY; if (b==-1 && (i==-1 || s==0)) result->oprs[operand].type |= MEM_OFFS; result->oprs[operand].basereg = b; result->oprs[operand].indexreg = i; result->oprs[operand].scale = s; result->oprs[operand].offset = o; } else { /* it's not a memory reference */ if (is_reloc(value)) { /* it's immediate */ result->oprs[operand].type |= IMMEDIATE; result->oprs[operand].offset = reloc_value(value); result->oprs[operand].segment = reloc_seg(value); result->oprs[operand].wrt = reloc_wrt(value); if (is_simple(value) && reloc_value(value)==1) result->oprs[operand].type |= UNITY; } else { /* it's a register */ if (value->type>=EXPR_SIMPLE || value->value!=1) { error (ERR_NONFATAL, "invalid operand type"); result->opcode = -1; return result; } /* clear overrides, except TO which applies to FPU regs */ result->oprs[operand].type &= TO; result->oprs[operand].type |= REGISTER; result->oprs[operand].type |= reg_flags[value->type]; result->oprs[operand].basereg = value->type; } } } result->operands = operand; /* set operand count */ while (operand<3) /* clear remaining operands */ result->oprs[operand++].type = 0; /* * Transform RESW, RESD, RESQ, REST into RESB. */ switch (result->opcode) { case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; } return result; } static int is_comma_next (void) { char *p; p = bufptr; while (isspace(*p)) p++; return (*p == ',' || *p == ';' || !*p); } /* isidstart matches any character that may start an identifier, and isidchar * matches any character that may appear at places other than the start of an * identifier. E.g. a period may only appear at the start of an identifier * (for local labels), whereas a number may appear anywhere *but* at the * start. */ #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ || (c)=='@' || (c)=='~' ) /* Ditto for numeric constants. */ #define isnumstart(c) ( isdigit(c) || (c)=='$' ) #define isnumchar(c) ( isalnum(c) ) /* This returns the numeric value of a given 'digit'. */ #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') /* * This tokeniser routine has only one side effect, that of * updating `bufptr'. Hence by saving `bufptr', lookahead may be * performed. */ static int nexttoken (void) { char ourcopy[256], *r, *s; while (isspace(*bufptr)) bufptr++; if (!*bufptr) return 0; /* we have a token; either an id, a number or a char */ if (isidstart(*bufptr) || (*bufptr == '$' && isidstart(bufptr[1]))) { /* now we've got an identifier */ int i; int is_sym = FALSE; if (*bufptr == '$') { is_sym = TRUE; bufptr++; } tokval.t_charptr = q; *q++ = *bufptr++; while (isidchar(*bufptr)) *q++ = *bufptr++; *q++ = '\0'; for (s=tokval.t_charptr, r=ourcopy; *s; s++) *r++ = tolower (*s); *r = '\0'; if (is_sym) return TOKEN_ID; /* bypass all other checks */ /* right, so we have an identifier sitting in temp storage. now, * is it actually a register or instruction name, or what? */ if ((tokval.t_integer=bsi(ourcopy, reg_names, elements(reg_names)))>=0) return TOKEN_REG; if ((tokval.t_integer=bsi(ourcopy, insn_names, elements(insn_names)))>=0) return TOKEN_INSN; for (i=0; i=0) return TOKEN_INSN; } if ((tokval.t_integer=bsi(ourcopy, prefix_names, elements(prefix_names)))>=0) { tokval.t_integer += PREFIX_ENUM_START; return TOKEN_PREFIX; } if ((tokval.t_integer=bsi(ourcopy, special_names, elements(special_names)))>=0) return TOKEN_SPECIAL; if (!strcmp(ourcopy, "seg")) return TOKEN_SEG; if (!strcmp(ourcopy, "wrt")) return TOKEN_WRT; return TOKEN_ID; } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { /* * It's a $ sign with no following hex number; this must * mean it's a Here token ($), evaluating to the current * assembly location, or a Base token ($$), evaluating to * the base of the current segment. */ bufptr++; if (*bufptr == '$') { bufptr++; return TOKEN_BASE; } return TOKEN_HERE; } else if (isnumstart(*bufptr)) { /* now we've got a number */ char *r = q; int rn_error; *q++ = *bufptr++; while (isnumchar(*bufptr)) { *q++ = *bufptr++; } if (*bufptr == '.') { /* * a floating point constant */ *q++ = *bufptr++; while (isnumchar(*bufptr)) { *q++ = *bufptr++; } *q++ = '\0'; tokval.t_charptr = r; return TOKEN_FLOAT; } *q++ = '\0'; tokval.t_integer = readnum(r, &rn_error); if (rn_error) return TOKEN_ERRNUM; /* some malformation occurred */ tokval.t_charptr = NULL; return TOKEN_NUM; } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ char quote = *bufptr++, *r; r = tokval.t_charptr = bufptr; while (*bufptr && *bufptr != quote) bufptr++; tokval.t_inttwo = bufptr - r; /* store full version */ if (!*bufptr) return TOKEN_ERRNUM; /* unmatched quotes */ tokval.t_integer = 0; r = bufptr++; /* skip over final quote */ while (quote != *--r) { tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; } return TOKEN_NUM; } else if (*bufptr == ';') { /* a comment has happened - stay */ return 0; } else if ((*bufptr == '>' || *bufptr == '<' || *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { bufptr += 2; return (bufptr[-2] == '>' ? TOKEN_SHR : bufptr[-2] == '<' ? TOKEN_SHL : bufptr[-2] == '/' ? TOKEN_SDIV : TOKEN_SMOD); } else /* just an ordinary char */ return (unsigned char) (*bufptr++); } /* return index of "string" in "array", or -1 if no match. */ static int bsi (char *string, char **array, int size) { int i = -1, j = size; /* always, i < index < j */ while (j-i >= 2) { int k = (i+j)/2; int l = strcmp(string, array[k]); if (l<0) /* it's in the first half */ j = k; else if (l>0) /* it's in the second half */ i = k; else /* we've got it :) */ return k; } return -1; /* we haven't got it :( */ } void cleanup_insn (insn *i) { extop *e; while (i->eops) { e = i->eops; i->eops = i->eops->next; nasm_free (e); } } /* ------------- Evaluator begins here ------------------ */ static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ /* * Add two vector datatypes. We have some bizarre behaviour on far- * absolute segment types: we preserve them during addition _only_ * if one of the segments is a truly pure scalar. */ static expr *add_vectors(expr *p, expr *q) { expr *r = tempptr; int preserve; preserve = is_really_simple(p) || is_really_simple(q); while (p->type && q->type && p->type < EXPR_SEGBASE+SEG_ABS && q->type < EXPR_SEGBASE+SEG_ABS) if (p->type > q->type) { tempptr->type = q->type; tempptr->value = q->value; tempptr++, q++; } else if (p->type < q->type) { tempptr->type = p->type; tempptr->value = p->value; tempptr++, p++; } else { /* *p and *q have same type */ tempptr->type = p->type; tempptr->value = p->value + q->value; tempptr++, p++, q++; } while (p->type && (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { tempptr->type = p->type; tempptr->value = p->value; tempptr++, p++; } while (q->type && (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { tempptr->type = q->type; tempptr->value = q->value; tempptr++, q++; } (tempptr++)->type = 0; return r; } /* * Multiply a vector by a scalar. Strip far-absolute segment part * if present. */ static expr *scalar_mult(expr *vect, long scalar) { expr *p = vect; while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { p->value = scalar * (p->value); p++; } p->type = 0; return vect; } static expr *scalarvect (long scalar) { expr *p = tempptr; tempptr->type = EXPR_SIMPLE; tempptr->value = scalar; tempptr++; tempptr->type = 0; tempptr++; return p; } /* * Return TRUE if the argument is a simple scalar. (Or a far- * absolute, which counts.) */ static int is_simple (expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) return 1; if (vect->type != EXPR_SIMPLE) return 0; do { vect++; } while (vect->type && !vect->value); if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; return 1; } /* * Return TRUE if the argument is a simple scalar, _NOT_ a far- * absolute. */ static int is_really_simple (expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) return 1; if (vect->type != EXPR_SIMPLE) return 0; do { vect++; } while (vect->type && !vect->value); if (vect->type) return 0; return 1; } /* * Return TRUE if the argument is relocatable (i.e. a simple * scalar, plus at most one segment-base, plus possibly a WRT). */ static int is_reloc (expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) return 1; if (vect->type < EXPR_SIMPLE) return 0; if (vect->type == EXPR_SIMPLE) { do { vect++; } while (vect->type && !vect->value); if (!vect->type) return 1; } do { vect++; } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); if (!vect->type) return 1; return 1; } /* * Return the scalar part of a relocatable vector. (Including * simple scalar vectors - those qualify as relocatable.) */ static long reloc_value (expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) return 0; if (vect->type == EXPR_SIMPLE) return vect->value; else return 0; } /* * Return the segment number of a relocatable vector, or NO_SEG for * simple scalars. */ static long reloc_seg (expr *vect) { while (vect->type && (vect->type == EXPR_WRT || !vect->value)) vect++; if (vect->type == EXPR_SIMPLE) { do { vect++; } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); } if (!vect->type) return NO_SEG; else return vect->type - EXPR_SEGBASE; } /* * Return the WRT segment number of a relocatable vector, or NO_SEG * if no WRT part is present. */ static long reloc_wrt (expr *vect) { while (vect->type && vect->type < EXPR_WRT) vect++; if (vect->type == EXPR_WRT) { return vect->value; } else return NO_SEG; } static void eval_reset(void) { tempptr = exprtempstorage; /* initialise temporary storage */ } /* * The SEG operator: calculate the segment part of a relocatable * value. Return NULL, as usual, if an error occurs. Report the * error too. */ static expr *segment_part (expr *e) { long seg; if (!is_reloc(e)) { error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); return NULL; } seg = reloc_seg(e); if (seg == NO_SEG) { error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); return NULL; } else if (seg & SEG_ABS) return scalarvect(seg & ~SEG_ABS); else { expr *f = tempptr++; tempptr++->type = 0; f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); f->value = 1; return f; } } /* * Recursive-descent parser. Called with a single boolean operand, * which is TRUE if the evaluation is critical (i.e. unresolved * symbols are an error condition). Must update the global `i' to * reflect the token after the parsed string. May return NULL. * * evaluate() should report its own errors: on return it is assumed * that if NULL has been returned, the error has already been * reported. */ /* * Grammar parsed is: * * expr : expr0 [ WRT expr6 ] * expr0 : expr1 [ {|} expr1] * expr1 : expr2 [ {^} expr2] * expr2 : expr3 [ {&} expr3] * expr3 : expr4 [ {<<,>>} expr4...] * expr4 : expr5 [ {+,-} expr5...] * expr5 : expr6 [ {*,/,%,//,%%} expr6...] * expr6 : { ~,+,-,SEG } expr6 * | (expr0) * | symbol * | $ * | number */ static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); static expr *expr4(int), *expr5(int), *expr6(int); static expr *expr0(int critical) { expr *e, *f; e = expr1(critical); if (!e) return NULL; while (i == '|') { i = nexttoken(); f = expr1(critical); if (!f) return NULL; if (!is_simple(e) || !is_simple(f)) { error(ERR_NONFATAL, "`|' operator may only be applied to" " scalar values"); } e = scalarvect (reloc_value(e) | reloc_value(f)); } return e; } static expr *expr1(int critical) { expr *e, *f; e = expr2(critical); if (!e) return NULL; while (i == '^') { i = nexttoken(); f = expr2(critical); if (!f) return NULL; if (!is_simple(e) || !is_simple(f)) { error(ERR_NONFATAL, "`^' operator may only be applied to" " scalar values"); } e = scalarvect (reloc_value(e) ^ reloc_value(f)); } return e; } static expr *expr2(int critical) { expr *e, *f; e = expr3(critical); if (!e) return NULL; while (i == '&') { i = nexttoken(); f = expr3(critical); if (!f) return NULL; if (!is_simple(e) || !is_simple(f)) { error(ERR_NONFATAL, "`&' operator may only be applied to" " scalar values"); } e = scalarvect (reloc_value(e) & reloc_value(f)); } return e; } static expr *expr3(int critical) { expr *e, *f; e = expr4(critical); if (!e) return NULL; while (i == TOKEN_SHL || i == TOKEN_SHR) { int j = i; i = nexttoken(); f = expr4(critical); if (!f) return NULL; if (!is_simple(e) || !is_simple(f)) { error(ERR_NONFATAL, "shift operator may only be applied to" " scalar values"); } switch (j) { case TOKEN_SHL: e = scalarvect (reloc_value(e) << reloc_value(f)); break; case TOKEN_SHR: e = scalarvect (((unsigned long)reloc_value(e)) >> reloc_value(f)); break; } } return e; } static expr *expr4(int critical) { expr *e, *f; e = expr5(critical); if (!e) return NULL; while (i == '+' || i == '-') { int j = i; i = nexttoken(); f = expr5(critical); if (!f) return NULL; switch (j) { case '+': e = add_vectors (e, f); break; case '-': e = add_vectors (e, scalar_mult(f, -1L)); break; } } return e; } static expr *expr5(int critical) { expr *e, *f; e = expr6(critical); if (!e) return NULL; while (i == '*' || i == '/' || i == '*' || i == TOKEN_SDIV || i == TOKEN_SMOD) { int j = i; i = nexttoken(); f = expr6(critical); if (!f) return NULL; if (j != '*' && (!is_simple(e) || !is_simple(f))) { error(ERR_NONFATAL, "division operator may only be applied to" " scalar values"); return NULL; } if (j != '*' && reloc_value(f) == 0) { error(ERR_NONFATAL, "division by zero"); return NULL; } switch (j) { case '*': if (is_simple(e)) e = scalar_mult (f, reloc_value(e)); else if (is_simple(f)) e = scalar_mult (e, reloc_value(f)); else { error(ERR_NONFATAL, "unable to multiply two " "non-scalar objects"); return NULL; } break; case '/': e = scalarvect (((unsigned long)reloc_value(e)) / ((unsigned long)reloc_value(f))); break; case '%': e = scalarvect (((unsigned long)reloc_value(e)) % ((unsigned long)reloc_value(f))); break; case TOKEN_SDIV: e = scalarvect (((signed long)reloc_value(e)) / ((signed long)reloc_value(f))); break; case TOKEN_SMOD: e = scalarvect (((signed long)reloc_value(e)) % ((signed long)reloc_value(f))); break; } } return e; } static expr *expr6(int critical) { expr *e; long label_seg, label_ofs; if (i == '-') { i = nexttoken(); e = expr6(critical); if (!e) return NULL; return scalar_mult (e, -1L); } else if (i == '+') { i = nexttoken(); return expr6(critical); } else if (i == '~') { i = nexttoken(); e = expr6(critical); if (!e) return NULL; if (!is_simple(e)) { error(ERR_NONFATAL, "`~' operator may only be applied to" " scalar values"); return NULL; } return scalarvect(~reloc_value(e)); } else if (i == TOKEN_SEG) { i = nexttoken(); e = expr6(critical); if (!e) return NULL; return segment_part(e); } else if (i == '(') { i = nexttoken(); e = expr0(critical); if (!e) return NULL; if (i != ')') { error(ERR_NONFATAL, "expecting `)'"); return NULL; } i = nexttoken(); return e; } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || i == TOKEN_HERE || i == TOKEN_BASE) { e = tempptr; switch (i) { case TOKEN_NUM: e->type = EXPR_SIMPLE; e->value = tokval.t_integer; break; case TOKEN_REG: e->type = tokval.t_integer; e->value = 1; break; case TOKEN_ID: case TOKEN_HERE: case TOKEN_BASE: /* * Since the whole line is parsed before the label it * defines is given to the label manager, we have * problems with lines such as * * end: TIMES 512-(end-start) DB 0 * * where `end' is not known on pass one, despite not * really being a forward reference, and due to * criticality it is _needed_. Hence we check our label * against the currently defined one, and do our own * resolution of it if we have to. */ if (i == TOKEN_BASE) { label_seg = seg; label_ofs = 0; } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { label_seg = seg; label_ofs = ofs; } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { if (critical == 2) { error (ERR_NONFATAL, "symbol `%s' undefined", tokval.t_charptr); return NULL; } else if (critical == 1) { error (ERR_NONFATAL, "symbol `%s' not defined before use", tokval.t_charptr); return NULL; } else { label_seg = seg; label_ofs = ofs; } } e->type = EXPR_SIMPLE; e->value = label_ofs; if (label_seg!=NO_SEG) { tempptr++; tempptr->type = EXPR_SEGBASE + label_seg; tempptr->value = 1; } break; } tempptr++; tempptr->type = 0; tempptr++; i = nexttoken(); return e; } else { error(ERR_NONFATAL, "expression syntax error"); return NULL; } } static expr *evaluate (int critical) { expr *e; expr *f = NULL; e = expr0 (critical); if (!e) return NULL; if (i == TOKEN_WRT) { if (!is_reloc(e)) { error(ERR_NONFATAL, "invalid left-hand operand to WRT"); return NULL; } i = nexttoken(); /* eat the WRT */ f = expr6 (critical); if (!f) return NULL; } e = scalar_mult (e, 1L); /* strip far-absolute segment part */ if (f) { expr *g = tempptr++; tempptr++->type = 0; g->type = EXPR_WRT; if (!is_reloc(f)) { error(ERR_NONFATAL, "invalid right-hand operand to WRT"); return NULL; } g->value = reloc_seg(f); if (g->value == NO_SEG) g->value = reloc_value(f) | SEG_ABS; else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { error(ERR_NONFATAL, "invalid right-hand operand to WRT"); return NULL; } e = add_vectors (e, g); } return e; }