Finishing touches on perfect hash tokenizer; actually turn the thing on

Finish the perfect hash tokenizer, and actually enable it. Move stdscan() et al to a separate file, since it's not needed in any of the clients of nasmlib other than nasm itself. Run make alldeps.
author: H. Peter Anvin <hpa@zytor.com> 2007-08-30 22:35:34 +0000
committer: H. Peter Anvin <hpa@zytor.com> 2007-08-30 22:35:34 +0000
commit: 74cc5e569c1c8bcdd886734e1ce4c2df741b5b07 (patch)
tree: 65fdefada17611dfd77275edc131c2d2d79d6b10 /nasmlib.c
parent: b1dabe44acca3ff927c024bb7875605a5439346c (diff)
download: nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.gz
nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.bz2
nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.zip
1 files changed, 1 insertions, 216 deletions
diff --git a/nasmlib.c b/nasmlib.c
index 64d85b9..b171c6f 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -14,7 +14,7 @@
 
 #include "nasm.h"
 #include "nasmlib.h"
-#include "insns.h"              /* For MAX_KEYWORD */
+#include "insns.h"
 
 int globalbits = 0;    /* defined in nasm.h, works better here for ASM+DISASM */
 static efunc nasm_malloc_error;
@@ -709,221 +709,6 @@ const char *prefix_name(int token)
 }
 
 /*
- * Standard scanner routine used by parser.c and some output
- * formats. It keeps a succession of temporary-storage strings in
- * stdscan_tempstorage, which can be cleared using stdscan_reset.
- */
-static char **stdscan_tempstorage = NULL;
-static int stdscan_tempsize = 0, stdscan_templen = 0;
-#define STDSCAN_TEMP_DELTA 256
-
-static void stdscan_pop(void)
-{
-    nasm_free(stdscan_tempstorage[--stdscan_templen]);
-}
-
-void stdscan_reset(void)
-{
-    while (stdscan_templen > 0)
-        stdscan_pop();
-}
-
-/*
- * Unimportant cleanup is done to avoid confusing people who are trying
- * to debug real memory leaks
- */
-void nasmlib_cleanup(void)
-{
-    stdscan_reset();
-    nasm_free(stdscan_tempstorage);
-}
-
-static char *stdscan_copy(char *p, int len)
-{
-    char *text;
-
-    text = nasm_malloc(len + 1);
-    strncpy(text, p, len);
-    text[len] = '\0';
-
-    if (stdscan_templen >= stdscan_tempsize) {
-        stdscan_tempsize += STDSCAN_TEMP_DELTA;
-        stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
-                                           stdscan_tempsize *
-                                           sizeof(char *));
-    }
-    stdscan_tempstorage[stdscan_templen++] = text;
-
-    return text;
-}
-
-char *stdscan_bufptr = NULL;
-int stdscan(void *private_data, struct tokenval *tv)
-{
-    char ourcopy[MAX_KEYWORD + 1], *r, *s;
-
-    (void)private_data;         /* Don't warn that this parameter is unused */
-
-    while (isspace(*stdscan_bufptr))
-        stdscan_bufptr++;
-    if (!*stdscan_bufptr)
-        return tv->t_type = 0;
-
-    /* we have a token; either an id, a number or a char */
-    if (isidstart(*stdscan_bufptr) ||
-        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
-        /* now we've got an identifier */
-        uint32_t i;
-        int is_sym = FALSE;
-
-        if (*stdscan_bufptr == '$') {
-            is_sym = TRUE;
-            stdscan_bufptr++;
-        }
-
-        r = stdscan_bufptr++;
-        /* read the entire buffer to advance the buffer pointer but... */
-        while (isidchar(*stdscan_bufptr))
-            stdscan_bufptr++;
-
-        /* ... copy only up to IDLEN_MAX-1 characters */
-        tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ?
-                                     stdscan_bufptr - r : IDLEN_MAX - 1);
-
-        if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
-            return tv->t_type = TOKEN_ID;       /* bypass all other checks */
-
-        for (s = tv->t_charptr, r = ourcopy; *s; s++)
-            *r++ = tolower(*s);
-        *r = '\0';
-        /* right, so we have an identifier sitting in temp storage. now,
-         * is it actually a register or instruction name, or what? */
-        if ((tv->t_integer = bsi(ourcopy, reg_names,
-                                 elements(reg_names))) >= 0) {
-            tv->t_integer += EXPR_REG_START;
-            return tv->t_type = TOKEN_REG;
-        } else if ((tv->t_integer = bsi(ourcopy, insn_names,
-                                        elements(insn_names))) >= 0) {
-            return tv->t_type = TOKEN_INSN;
-        }
-        for (i = 0; i < elements(icn); i++)
-            if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
-                char *p = ourcopy + strlen(icn[i]);
-                tv->t_integer = ico[i];
-                if ((tv->t_inttwo = bsi(p, conditions,
-                                        elements(conditions))) >= 0)
-                    return tv->t_type = TOKEN_INSN;
-            }
-        if ((tv->t_integer = bsi(ourcopy, prefix_names,
-                                 elements(prefix_names))) >= 0) {
-            tv->t_integer += PREFIX_ENUM_START;
-            return tv->t_type = TOKEN_PREFIX;
-        }
-        if ((tv->t_integer = bsi(ourcopy, special_names,
-                                 elements(special_names))) >= 0)
-            return tv->t_type = TOKEN_SPECIAL;
-        if (!nasm_stricmp(ourcopy, "seg"))
-            return tv->t_type = TOKEN_SEG;
-        if (!nasm_stricmp(ourcopy, "wrt"))
-            return tv->t_type = TOKEN_WRT;
-        return tv->t_type = TOKEN_ID;
-    } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
-        /*
-         * It's a $ sign with no following hex number; this must
-         * mean it's a Here token ($), evaluating to the current
-         * assembly location, or a Base token ($$), evaluating to
-         * the base of the current segment.
-         */
-        stdscan_bufptr++;
-        if (*stdscan_bufptr == '$') {
-            stdscan_bufptr++;
-            return tv->t_type = TOKEN_BASE;
-        }
-        return tv->t_type = TOKEN_HERE;
-    } else if (isnumstart(*stdscan_bufptr)) {   /* now we've got a number */
-        int rn_error;
-
-        r = stdscan_bufptr++;
-        while (isnumchar(*stdscan_bufptr))
-            stdscan_bufptr++;
-
-        if (*stdscan_bufptr == '.') {
-            /*
-             * a floating point constant
-             */
-            stdscan_bufptr++;
-            while (isnumchar(*stdscan_bufptr) ||
-                   ((stdscan_bufptr[-1] == 'e'
-                     || stdscan_bufptr[-1] == 'E')
-                    && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) {
-                stdscan_bufptr++;
-            }
-            tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
-            return tv->t_type = TOKEN_FLOAT;
-        }
-        r = stdscan_copy(r, stdscan_bufptr - r);
-        tv->t_integer = readnum(r, &rn_error);
-        stdscan_pop();
-        if (rn_error)
-            return tv->t_type = TOKEN_ERRNUM;   /* some malformation occurred */
-        tv->t_charptr = NULL;
-        return tv->t_type = TOKEN_NUM;
-    } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') {     /* a char constant */
-        char quote = *stdscan_bufptr++, *r;
-        int rn_warn;
-        r = tv->t_charptr = stdscan_bufptr;
-        while (*stdscan_bufptr && *stdscan_bufptr != quote)
-            stdscan_bufptr++;
-        tv->t_inttwo = stdscan_bufptr - r;      /* store full version */
-        if (!*stdscan_bufptr)
-            return tv->t_type = TOKEN_ERRNUM;   /* unmatched quotes */
-        stdscan_bufptr++;       /* skip over final quote */
-        tv->t_integer = readstrnum(r, tv->t_inttwo, &rn_warn);
-        /* FIXME: rn_warn is not checked! */
-        return tv->t_type = TOKEN_NUM;
-    } else if (*stdscan_bufptr == ';') {        /* a comment has happened - stay */
-        return tv->t_type = 0;
-    } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_SHR;
-    } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_SHL;
-    } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_SDIV;
-    } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_SMOD;
-    } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_EQ;
-    } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_NE;
-    } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_NE;
-    } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_LE;
-    } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_GE;
-    } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_DBL_AND;
-    } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_DBL_XOR;
-    } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
-        stdscan_bufptr += 2;
-        return tv->t_type = TOKEN_DBL_OR;
-    } else                      /* just an ordinary char */
-        return tv->t_type = (uint8_t)(*stdscan_bufptr++);
-}
-
-/*
  * Return TRUE if the argument is a simple scalar. (Or a far-
  * absolute, which counts.)
  */
author	H. Peter Anvin <hpa@zytor.com>	2007-08-30 22:35:34 +0000
committer	H. Peter Anvin <hpa@zytor.com>	2007-08-30 22:35:34 +0000
commit	74cc5e569c1c8bcdd886734e1ce4c2df741b5b07 (patch)
tree	65fdefada17611dfd77275edc131c2d2d79d6b10 /nasmlib.c
parent	b1dabe44acca3ff927c024bb7875605a5439346c (diff)
download	nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.gz nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.tar.bz2 nasm-74cc5e569c1c8bcdd886734e1ce4c2df741b5b07.zip