summaryrefslogtreecommitdiff
path: root/nasmlib.c
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-08-30 22:35:34 +0000
committerH. Peter Anvin <hpa@zytor.com>2007-08-30 22:35:34 +0000
commit74cc5e569c1c8bcdd886734e1ce4c2df741b5b07 (patch)
tree65fdefada17611dfd77275edc131c2d2d79d6b10 /nasmlib.c
parentb1dabe44acca3ff927c024bb7875605a5439346c (diff)
downloadnasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.gz
nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.bz2
nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.zip
Finishing touches on perfect hash tokenizer; actually turn the thing on
Finish the perfect hash tokenizer, and actually enable it. Move stdscan() et al to a separate file, since it's not needed in any of the clients of nasmlib other than nasm itself. Run make alldeps.
Diffstat (limited to 'nasmlib.c')
-rw-r--r--nasmlib.c217
1 files changed, 1 insertions, 216 deletions
diff --git a/nasmlib.c b/nasmlib.c
index 64d85b9..b171c6f 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -14,7 +14,7 @@
#include "nasm.h"
#include "nasmlib.h"
-#include "insns.h" /* For MAX_KEYWORD */
+#include "insns.h"
int globalbits = 0; /* defined in nasm.h, works better here for ASM+DISASM */
static efunc nasm_malloc_error;
@@ -709,221 +709,6 @@ const char *prefix_name(int token)
}
/*
- * Standard scanner routine used by parser.c and some output
- * formats. It keeps a succession of temporary-storage strings in
- * stdscan_tempstorage, which can be cleared using stdscan_reset.
- */
-static char **stdscan_tempstorage = NULL;
-static int stdscan_tempsize = 0, stdscan_templen = 0;
-#define STDSCAN_TEMP_DELTA 256
-
-static void stdscan_pop(void)
-{
- nasm_free(stdscan_tempstorage[--stdscan_templen]);
-}
-
-void stdscan_reset(void)
-{
- while (stdscan_templen > 0)
- stdscan_pop();
-}
-
-/*
- * Unimportant cleanup is done to avoid confusing people who are trying
- * to debug real memory leaks
- */
-void nasmlib_cleanup(void)
-{
- stdscan_reset();
- nasm_free(stdscan_tempstorage);
-}
-
-static char *stdscan_copy(char *p, int len)
-{
- char *text;
-
- text = nasm_malloc(len + 1);
- strncpy(text, p, len);
- text[len] = '\0';
-
- if (stdscan_templen >= stdscan_tempsize) {
- stdscan_tempsize += STDSCAN_TEMP_DELTA;
- stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
- stdscan_tempsize *
- sizeof(char *));
- }
- stdscan_tempstorage[stdscan_templen++] = text;
-
- return text;
-}
-
-char *stdscan_bufptr = NULL;
-int stdscan(void *private_data, struct tokenval *tv)
-{
- char ourcopy[MAX_KEYWORD + 1], *r, *s;
-
- (void)private_data; /* Don't warn that this parameter is unused */
-
- while (isspace(*stdscan_bufptr))
- stdscan_bufptr++;
- if (!*stdscan_bufptr)
- return tv->t_type = 0;
-
- /* we have a token; either an id, a number or a char */
- if (isidstart(*stdscan_bufptr) ||
- (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
- /* now we've got an identifier */
- uint32_t i;
- int is_sym = FALSE;
-
- if (*stdscan_bufptr == '$') {
- is_sym = TRUE;
- stdscan_bufptr++;
- }
-
- r = stdscan_bufptr++;
- /* read the entire buffer to advance the buffer pointer but... */
- while (isidchar(*stdscan_bufptr))
- stdscan_bufptr++;
-
- /* ... copy only up to IDLEN_MAX-1 characters */
- tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ?
- stdscan_bufptr - r : IDLEN_MAX - 1);
-
- if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
- return tv->t_type = TOKEN_ID; /* bypass all other checks */
-
- for (s = tv->t_charptr, r = ourcopy; *s; s++)
- *r++ = tolower(*s);
- *r = '\0';
- /* right, so we have an identifier sitting in temp storage. now,
- * is it actually a register or instruction name, or what? */
- if ((tv->t_integer = bsi(ourcopy, reg_names,
- elements(reg_names))) >= 0) {
- tv->t_integer += EXPR_REG_START;
- return tv->t_type = TOKEN_REG;
- } else if ((tv->t_integer = bsi(ourcopy, insn_names,
- elements(insn_names))) >= 0) {
- return tv->t_type = TOKEN_INSN;
- }
- for (i = 0; i < elements(icn); i++)
- if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
- char *p = ourcopy + strlen(icn[i]);
- tv->t_integer = ico[i];
- if ((tv->t_inttwo = bsi(p, conditions,
- elements(conditions))) >= 0)
- return tv->t_type = TOKEN_INSN;
- }
- if ((tv->t_integer = bsi(ourcopy, prefix_names,
- elements(prefix_names))) >= 0) {
- tv->t_integer += PREFIX_ENUM_START;
- return tv->t_type = TOKEN_PREFIX;
- }
- if ((tv->t_integer = bsi(ourcopy, special_names,
- elements(special_names))) >= 0)
- return tv->t_type = TOKEN_SPECIAL;
- if (!nasm_stricmp(ourcopy, "seg"))
- return tv->t_type = TOKEN_SEG;
- if (!nasm_stricmp(ourcopy, "wrt"))
- return tv->t_type = TOKEN_WRT;
- return tv->t_type = TOKEN_ID;
- } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
- /*
- * It's a $ sign with no following hex number; this must
- * mean it's a Here token ($), evaluating to the current
- * assembly location, or a Base token ($$), evaluating to
- * the base of the current segment.
- */
- stdscan_bufptr++;
- if (*stdscan_bufptr == '$') {
- stdscan_bufptr++;
- return tv->t_type = TOKEN_BASE;
- }
- return tv->t_type = TOKEN_HERE;
- } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
- int rn_error;
-
- r = stdscan_bufptr++;
- while (isnumchar(*stdscan_bufptr))
- stdscan_bufptr++;
-
- if (*stdscan_bufptr == '.') {
- /*
- * a floating point constant
- */
- stdscan_bufptr++;
- while (isnumchar(*stdscan_bufptr) ||
- ((stdscan_bufptr[-1] == 'e'
- || stdscan_bufptr[-1] == 'E')
- && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) {
- stdscan_bufptr++;
- }
- tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
- return tv->t_type = TOKEN_FLOAT;
- }
- r = stdscan_copy(r, stdscan_bufptr - r);
- tv->t_integer = readnum(r, &rn_error);
- stdscan_pop();
- if (rn_error)
- return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */
- tv->t_charptr = NULL;
- return tv->t_type = TOKEN_NUM;
- } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') { /* a char constant */
- char quote = *stdscan_bufptr++, *r;
- int rn_warn;
- r = tv->t_charptr = stdscan_bufptr;
- while (*stdscan_bufptr && *stdscan_bufptr != quote)
- stdscan_bufptr++;
- tv->t_inttwo = stdscan_bufptr - r; /* store full version */
- if (!*stdscan_bufptr)
- return tv->t_type = TOKEN_ERRNUM; /* unmatched quotes */
- stdscan_bufptr++; /* skip over final quote */
- tv->t_integer = readstrnum(r, tv->t_inttwo, &rn_warn);
- /* FIXME: rn_warn is not checked! */
- return tv->t_type = TOKEN_NUM;
- } else if (*stdscan_bufptr == ';') { /* a comment has happened - stay */
- return tv->t_type = 0;
- } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_SHR;
- } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_SHL;
- } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_SDIV;
- } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_SMOD;
- } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_EQ;
- } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_NE;
- } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_NE;
- } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_LE;
- } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_GE;
- } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_DBL_AND;
- } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_DBL_XOR;
- } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
- stdscan_bufptr += 2;
- return tv->t_type = TOKEN_DBL_OR;
- } else /* just an ordinary char */
- return tv->t_type = (uint8_t)(*stdscan_bufptr++);
-}
-
-/*
* Return TRUE if the argument is a simple scalar. (Or a far-
* absolute, which counts.)
*/