diff options
Diffstat (limited to 'src/llex.c')
-rw-r--r-- | src/llex.c | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/src/llex.c b/src/llex.c new file mode 100644 index 0000000..6dc3193 --- /dev/null +++ b/src/llex.c @@ -0,0 +1,461 @@ +/* +** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ +** Lexical Analyzer +** See Copyright Notice in lua.h +*/ + + +#include <ctype.h> +#include <locale.h> +#include <string.h> + +#define llex_c +#define LUA_CORE + +#include "lua.h" + +#include "ldo.h" +#include "llex.h" +#include "lobject.h" +#include "lparser.h" +#include "lstate.h" +#include "lstring.h" +#include "ltable.h" +#include "lzio.h" + + + +#define next(ls) (ls->current = zgetc(ls->z)) + + + + +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') + + +/* ORDER RESERVED */ +const char *const luaX_tokens [] = { + "and", "break", "do", "else", "elseif", + "end", "false", "for", "function", "if", + "in", "local", "nil", "not", "or", "repeat", + "return", "then", "true", "until", "while", + "..", "...", "==", ">=", "<=", "~=", + "<number>", "<name>", "<string>", "<eof>", + NULL +}; + + +#define save_and_next(ls) (save(ls, ls->current), next(ls)) + + +static void save (LexState *ls, int c) { + Mbuffer *b = ls->buff; + if (b->n + 1 > b->buffsize) { + size_t newsize; + if (b->buffsize >= MAX_SIZET/2) + luaX_lexerror(ls, "lexical element too long", 0); + newsize = b->buffsize * 2; + luaZ_resizebuffer(ls->L, b, newsize); + } + b->buffer[b->n++] = cast(char, c); +} + + +void luaX_init (lua_State *L) { + int i; + for (i=0; i<NUM_RESERVED; i++) { + TString *ts = luaS_new(L, luaX_tokens[i]); + luaS_fix(ts); /* reserved words are never collected */ + lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); + ts->tsv.reserved = cast_byte(i+1); /* reserved word */ + } +} + + +#define MAXSRC 80 + + +const char *luaX_token2str (LexState *ls, int token) { + if (token < FIRST_RESERVED) { + lua_assert(token == cast(unsigned char, token)); + return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : + luaO_pushfstring(ls->L, "%c", token); + } + else + return luaX_tokens[token-FIRST_RESERVED]; +} + + +static const char *txtToken (LexState *ls, int token) { + switch (token) { + case TK_NAME: + case TK_STRING: + case TK_NUMBER: + save(ls, '\0'); + return luaZ_buffer(ls->buff); + default: + return luaX_token2str(ls, token); + } +} + + +void luaX_lexerror (LexState *ls, const char *msg, int token) { + char buff[MAXSRC]; + luaO_chunkid(buff, getstr(ls->source), MAXSRC); + msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); + if (token) + luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); + luaD_throw(ls->L, LUA_ERRSYNTAX); +} + + +void luaX_syntaxerror (LexState *ls, const char *msg) { + luaX_lexerror(ls, msg, ls->t.token); +} + + +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { + lua_State *L = ls->L; + TString *ts = luaS_newlstr(L, str, l); + TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ + if (ttisnil(o)) + setbvalue(o, 1); /* make sure `str' will not be collected */ + return ts; +} + + +static void inclinenumber (LexState *ls) { + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip `\n' or `\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip `\n\r' or `\r\n' */ + if (++ls->linenumber >= MAX_INT) + luaX_syntaxerror(ls, "chunk has too many lines"); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { + ls->decpoint = '.'; + ls->L = L; + ls->lookahead.token = TK_EOS; /* no look-ahead token */ + ls->z = z; + ls->fs = NULL; + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ + next(ls); /* read first char */ +} + + + +/* +** ======================================================= +** LEXICAL ANALYZER +** ======================================================= +*/ + + + +static int check_next (LexState *ls, const char *set) { + if (!strchr(set, ls->current)) + return 0; + save_and_next(ls); + return 1; +} + + +static void buffreplace (LexState *ls, char from, char to) { + size_t n = luaZ_bufflen(ls->buff); + char *p = luaZ_buffer(ls->buff); + while (n--) + if (p[n] == from) p[n] = to; +} + + +static void trydecpoint (LexState *ls, SemInfo *seminfo) { + /* format error: try to update decimal point separator */ + struct lconv *cv = localeconv(); + char old = ls->decpoint; + ls->decpoint = (cv ? cv->decimal_point[0] : '.'); + buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { + /* format error with correct decimal point: no more options */ + buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ + luaX_lexerror(ls, "malformed number", TK_NUMBER); + } +} + + +/* LUA_NUMBER */ +static void read_numeral (LexState *ls, SemInfo *seminfo) { + lua_assert(isdigit(ls->current)); + do { + save_and_next(ls); + } while (isdigit(ls->current) || ls->current == '.'); + if (check_next(ls, "Ee")) /* `E'? */ + check_next(ls, "+-"); /* optional exponent sign */ + while (isalnum(ls->current) || ls->current == '_') + save_and_next(ls); + save(ls, '\0'); + buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ + trydecpoint(ls, seminfo); /* try to update decimal point separator */ +} + + +static int skip_sep (LexState *ls) { + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + + +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { + int cont = 0; + (void)(cont); /* avoid warnings when `cont' is not used */ + save_and_next(ls); /* skip 2nd `[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ + for (;;) { + switch (ls->current) { + case EOZ: + luaX_lexerror(ls, (seminfo) ? "unfinished long string" : + "unfinished long comment", TK_EOS); + break; /* to avoid warnings */ +#if defined(LUA_COMPAT_LSTR) + case '[': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `[' */ + cont++; +#if LUA_COMPAT_LSTR == 1 + if (sep == 0) + luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); +#endif + } + break; + } +#endif + case ']': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `]' */ +#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 + cont--; + if (sep == 0 && cont >= 0) break; +#endif + goto endloop; + } + break; + } + case '\n': + case '\r': { + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ + break; + } + default: { + if (seminfo) save_and_next(ls); + else next(ls); + } + } + } endloop: + if (seminfo) + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), + luaZ_bufflen(ls->buff) - 2*(2 + sep)); +} + + +static void read_string (LexState *ls, int del, SemInfo *seminfo) { + save_and_next(ls); + while (ls->current != del) { + switch (ls->current) { + case EOZ: + luaX_lexerror(ls, "unfinished string", TK_EOS); + continue; /* to avoid warnings */ + case '\n': + case '\r': + luaX_lexerror(ls, "unfinished string", TK_STRING); + continue; /* to avoid warnings */ + case '\\': { + int c; + next(ls); /* do not save the `\' */ + switch (ls->current) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\n': /* go through */ + case '\r': save(ls, '\n'); inclinenumber(ls); continue; + case EOZ: continue; /* will raise an error next loop */ + default: { + if (!isdigit(ls->current)) + save_and_next(ls); /* handles \\, \", \', and \? */ + else { /* \xxx */ + int i = 0; + c = 0; + do { + c = 10*c + (ls->current-'0'); + next(ls); + } while (++i<3 && isdigit(ls->current)); + if (c > UCHAR_MAX) + luaX_lexerror(ls, "escape sequence too large", TK_STRING); + save(ls, c); + } + continue; + } + } + save(ls, c); + next(ls); + continue; + } + default: + save_and_next(ls); + } + } + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, + luaZ_bufflen(ls->buff) - 2); +} + + +static int llex (LexState *ls, SemInfo *seminfo) { + luaZ_resetbuffer(ls->buff); + for (;;) { + switch (ls->current) { + case '\n': + case '\r': { + inclinenumber(ls); + continue; + } + case '-': { + next(ls); + if (ls->current != '-') return '-'; + /* else is a comment */ + next(ls); + if (ls->current == '[') { + int sep = skip_sep(ls); + luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* long comment */ + luaZ_resetbuffer(ls->buff); + continue; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); + continue; + } + case '[': { + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, seminfo, sep); + return TK_STRING; + } + else if (sep == -1) return '['; + else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); + } + case '=': { + next(ls); + if (ls->current != '=') return '='; + else { next(ls); return TK_EQ; } + } + case '<': { + next(ls); + if (ls->current != '=') return '<'; + else { next(ls); return TK_LE; } + } + case '>': { + next(ls); + if (ls->current != '=') return '>'; + else { next(ls); return TK_GE; } + } + case '~': { + next(ls); + if (ls->current != '=') return '~'; + else { next(ls); return TK_NE; } + } + case '"': + case '\'': { + read_string(ls, ls->current, seminfo); + return TK_STRING; + } + case '.': { + save_and_next(ls); + if (check_next(ls, ".")) { + if (check_next(ls, ".")) + return TK_DOTS; /* ... */ + else return TK_CONCAT; /* .. */ + } + else if (!isdigit(ls->current)) return '.'; + else { + read_numeral(ls, seminfo); + return TK_NUMBER; + } + } + case EOZ: { + return TK_EOS; + } + default: { + if (isspace(ls->current)) { + lua_assert(!currIsNewline(ls)); + next(ls); + continue; + } + else if (isdigit(ls->current)) { + read_numeral(ls, seminfo); + return TK_NUMBER; + } + else if (isalpha(ls->current) || ls->current == '_') { + /* identifier or reserved word */ + TString *ts; + do { + save_and_next(ls); + } while (isalnum(ls->current) || ls->current == '_'); + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), + luaZ_bufflen(ls->buff)); + if (ts->tsv.reserved > 0) /* reserved word? */ + return ts->tsv.reserved - 1 + FIRST_RESERVED; + else { + seminfo->ts = ts; + return TK_NAME; + } + } + else { + int c = ls->current; + next(ls); + return c; /* single-char tokens (+ - / ...) */ + } + } + } + } +} + + +void luaX_next (LexState *ls) { + ls->lastline = ls->linenumber; + if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ + ls->t = ls->lookahead; /* use this one */ + ls->lookahead.token = TK_EOS; /* and discharge it */ + } + else + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ +} + + +void luaX_lookahead (LexState *ls) { + lua_assert(ls->lookahead.token == TK_EOS); + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); +} + |