diff options
author | H. Peter Anvin <hpa@zytor.com> | 2007-08-30 22:35:34 +0000 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2007-08-30 22:35:34 +0000 |
commit | 74cc5e569c1c8bcdd886734e1ce4c2df741b5b07 (patch) | |
tree | 65fdefada17611dfd77275edc131c2d2d79d6b10 /nasmlib.c | |
parent | b1dabe44acca3ff927c024bb7875605a5439346c (diff) | |
download | nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.gz nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.bz2 nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.zip |
Finishing touches on perfect hash tokenizer; actually turn the thing on
Finish the perfect hash tokenizer, and actually enable it.
Move stdscan() et al to a separate file, since it's not needed in any
of the clients of nasmlib other than nasm itself.
Run make alldeps.
Diffstat (limited to 'nasmlib.c')
-rw-r--r-- | nasmlib.c | 217 |
1 files changed, 1 insertions, 216 deletions
@@ -14,7 +14,7 @@ #include "nasm.h" #include "nasmlib.h" -#include "insns.h" /* For MAX_KEYWORD */ +#include "insns.h" int globalbits = 0; /* defined in nasm.h, works better here for ASM+DISASM */ static efunc nasm_malloc_error; @@ -709,221 +709,6 @@ const char *prefix_name(int token) } /* - * Standard scanner routine used by parser.c and some output - * formats. It keeps a succession of temporary-storage strings in - * stdscan_tempstorage, which can be cleared using stdscan_reset. - */ -static char **stdscan_tempstorage = NULL; -static int stdscan_tempsize = 0, stdscan_templen = 0; -#define STDSCAN_TEMP_DELTA 256 - -static void stdscan_pop(void) -{ - nasm_free(stdscan_tempstorage[--stdscan_templen]); -} - -void stdscan_reset(void) -{ - while (stdscan_templen > 0) - stdscan_pop(); -} - -/* - * Unimportant cleanup is done to avoid confusing people who are trying - * to debug real memory leaks - */ -void nasmlib_cleanup(void) -{ - stdscan_reset(); - nasm_free(stdscan_tempstorage); -} - -static char *stdscan_copy(char *p, int len) -{ - char *text; - - text = nasm_malloc(len + 1); - strncpy(text, p, len); - text[len] = '\0'; - - if (stdscan_templen >= stdscan_tempsize) { - stdscan_tempsize += STDSCAN_TEMP_DELTA; - stdscan_tempstorage = nasm_realloc(stdscan_tempstorage, - stdscan_tempsize * - sizeof(char *)); - } - stdscan_tempstorage[stdscan_templen++] = text; - - return text; -} - -char *stdscan_bufptr = NULL; -int stdscan(void *private_data, struct tokenval *tv) -{ - char ourcopy[MAX_KEYWORD + 1], *r, *s; - - (void)private_data; /* Don't warn that this parameter is unused */ - - while (isspace(*stdscan_bufptr)) - stdscan_bufptr++; - if (!*stdscan_bufptr) - return tv->t_type = 0; - - /* we have a token; either an id, a number or a char */ - if (isidstart(*stdscan_bufptr) || - (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) { - /* now we've got an identifier */ - uint32_t i; - int is_sym = FALSE; - - if (*stdscan_bufptr == '$') { - is_sym = TRUE; - stdscan_bufptr++; - } - - r = stdscan_bufptr++; - /* read the entire buffer to advance the buffer pointer but... */ - while (isidchar(*stdscan_bufptr)) - stdscan_bufptr++; - - /* ... copy only up to IDLEN_MAX-1 characters */ - tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ? - stdscan_bufptr - r : IDLEN_MAX - 1); - - if (is_sym || stdscan_bufptr - r > MAX_KEYWORD) - return tv->t_type = TOKEN_ID; /* bypass all other checks */ - - for (s = tv->t_charptr, r = ourcopy; *s; s++) - *r++ = tolower(*s); - *r = '\0'; - /* right, so we have an identifier sitting in temp storage. now, - * is it actually a register or instruction name, or what? */ - if ((tv->t_integer = bsi(ourcopy, reg_names, - elements(reg_names))) >= 0) { - tv->t_integer += EXPR_REG_START; - return tv->t_type = TOKEN_REG; - } else if ((tv->t_integer = bsi(ourcopy, insn_names, - elements(insn_names))) >= 0) { - return tv->t_type = TOKEN_INSN; - } - for (i = 0; i < elements(icn); i++) - if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) { - char *p = ourcopy + strlen(icn[i]); - tv->t_integer = ico[i]; - if ((tv->t_inttwo = bsi(p, conditions, - elements(conditions))) >= 0) - return tv->t_type = TOKEN_INSN; - } - if ((tv->t_integer = bsi(ourcopy, prefix_names, - elements(prefix_names))) >= 0) { - tv->t_integer += PREFIX_ENUM_START; - return tv->t_type = TOKEN_PREFIX; - } - if ((tv->t_integer = bsi(ourcopy, special_names, - elements(special_names))) >= 0) - return tv->t_type = TOKEN_SPECIAL; - if (!nasm_stricmp(ourcopy, "seg")) - return tv->t_type = TOKEN_SEG; - if (!nasm_stricmp(ourcopy, "wrt")) - return tv->t_type = TOKEN_WRT; - return tv->t_type = TOKEN_ID; - } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) { - /* - * It's a $ sign with no following hex number; this must - * mean it's a Here token ($), evaluating to the current - * assembly location, or a Base token ($$), evaluating to - * the base of the current segment. - */ - stdscan_bufptr++; - if (*stdscan_bufptr == '$') { - stdscan_bufptr++; - return tv->t_type = TOKEN_BASE; - } - return tv->t_type = TOKEN_HERE; - } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */ - int rn_error; - - r = stdscan_bufptr++; - while (isnumchar(*stdscan_bufptr)) - stdscan_bufptr++; - - if (*stdscan_bufptr == '.') { - /* - * a floating point constant - */ - stdscan_bufptr++; - while (isnumchar(*stdscan_bufptr) || - ((stdscan_bufptr[-1] == 'e' - || stdscan_bufptr[-1] == 'E') - && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) { - stdscan_bufptr++; - } - tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); - return tv->t_type = TOKEN_FLOAT; - } - r = stdscan_copy(r, stdscan_bufptr - r); - tv->t_integer = readnum(r, &rn_error); - stdscan_pop(); - if (rn_error) - return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */ - tv->t_charptr = NULL; - return tv->t_type = TOKEN_NUM; - } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') { /* a char constant */ - char quote = *stdscan_bufptr++, *r; - int rn_warn; - r = tv->t_charptr = stdscan_bufptr; - while (*stdscan_bufptr && *stdscan_bufptr != quote) - stdscan_bufptr++; - tv->t_inttwo = stdscan_bufptr - r; /* store full version */ - if (!*stdscan_bufptr) - return tv->t_type = TOKEN_ERRNUM; /* unmatched quotes */ - stdscan_bufptr++; /* skip over final quote */ - tv->t_integer = readstrnum(r, tv->t_inttwo, &rn_warn); - /* FIXME: rn_warn is not checked! */ - return tv->t_type = TOKEN_NUM; - } else if (*stdscan_bufptr == ';') { /* a comment has happened - stay */ - return tv->t_type = 0; - } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_SHR; - } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_SHL; - } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_SDIV; - } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_SMOD; - } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_EQ; - } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_NE; - } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_NE; - } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_LE; - } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_GE; - } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_DBL_AND; - } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_DBL_XOR; - } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') { - stdscan_bufptr += 2; - return tv->t_type = TOKEN_DBL_OR; - } else /* just an ordinary char */ - return tv->t_type = (uint8_t)(*stdscan_bufptr++); -} - -/* * Return TRUE if the argument is a simple scalar. (Or a far- * absolute, which counts.) */ |