diff options
author | H. Peter Anvin <hpa@zytor.com> | 2007-10-19 13:10:46 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2007-10-19 13:10:46 -0700 |
commit | 2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5 (patch) | |
tree | 1fb0b1e1b0116950ba3836ea9a4fda9edba1f939 | |
parent | a8eace2b79b1068e54c4af93c41b6e58ba879b83 (diff) | |
download | nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.gz nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.bz2 nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.zip |
Allow underscores in numbers; better detection of FP
- Allow underscores as group separators in numbers, for example:
0x1234_5678 is now a legal number. The underscore is just ignored,
it adds no meaning.
- Recognize dotless floating-point numbers, such as "1e30". This
entails distinguishing hexadecimal numbers in the scanner, since
e.g. 0x1e30 is a perfectly legitimate hex constant.
-rw-r--r-- | float.c | 103 | ||||
-rw-r--r-- | nasmlib.c | 25 | ||||
-rw-r--r-- | stdscan.c | 60 |
3 files changed, 110 insertions, 78 deletions
@@ -113,6 +113,53 @@ static int float_multiply(uint16_t * to, uint16_t * from) /* * --------------------------------------------------------------------------- + * read an exponent; returns INT32_MAX on error + * --------------------------------------------------------------------------- + */ +int32_t read_exponent(const char *string, int32_t max) +{ + int32_t i = 0; + bool neg = false; + + if (*string == '+') { + string++; + } else if (*string == '-') { + neg = true; + string++; + } + while (*string) { + if (*string >= '0' && *string <= '9') { + i = (i * 10) + (*string - '0'); + + /* + * To ensure that underflows and overflows are + * handled properly we must avoid wraparounds of + * the signed integer value that is used to hold + * the exponent. Therefore we cap the exponent at + * +/-5000, which is slightly more/less than + * what's required for normal and denormal numbers + * in single, double, and extended precision, but + * sufficient to avoid signed integer wraparound. + */ + if (i > max) { + break; + } + } else if (*string == '_') { + /* do nothing */ + } else { + error(ERR_NONFATAL, + "invalid character in floating-point constant %s: '%c'", + "exponent", *string); + return INT32_MAX; + } + string++; + } + + return neg ? -i : i; +} + +/* + * --------------------------------------------------------------------------- * convert * --------------------------------------------------------------------------- */ @@ -161,7 +208,6 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, } } } else if (*string == '_') { - /* do nothing */ } else { error(ERR_NONFATAL, @@ -171,48 +217,15 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, } string++; } + if (*string) { - int32_t i = 0; - bool neg = false; - string++; /* eat the E */ - if (*string == '+') { - string++; - } else if (*string == '-') { - neg = true; - string++; - } - while (*string) { - if (*string >= '0' && *string <= '9') { - i = (i * 10) + (*string - '0'); - - /* - * To ensure that underflows and overflows are - * handled properly we must avoid wraparounds of - * the signed integer value that is used to hold - * the exponent. Therefore we cap the exponent at - * +/-5000, which is slightly more/less than - * what's required for normal and denormal numbers - * in single, double, and extended precision, but - * sufficient to avoid signed integer wraparound. - */ - if (i > 5000) { - break; - } - } else if (*string == '_') { + int32_t e; - /* do nothing */ - } else { - error(ERR_NONFATAL, - "invalid character in floating-point constant %s: '%c'", - "exponent", *string); - return false; - } - string++; - } - if (neg) { - i = 0 - i; - } - tenpwr += i; + string++; /* eat the E */ + e = read_exponent(string, 5000); + if (e == INT32_MAX) + return false; + tenpwr += e; } /* @@ -480,8 +493,14 @@ static bool ieee_flconvert_hex(const char *string, uint16_t * mant, twopwr -= 4; } } else if (c == 'p' || c == 'P') { - twopwr += atoi(string); + int32_t e; + e = read_exponent(string, 16384); + if (e == INT32_MAX) + return false; + twopwr += e; break; + } else if (c == '_') { + /* ignore */ } else { error(ERR_NONFATAL, "floating-point constant: `%c' is invalid character", c); @@ -193,7 +193,7 @@ char *nasm_strsep(char **stringp, const char *delim) #endif -#define lib_isnumchar(c) ( isalnum(c) || (c) == '$') +#define lib_isnumchar(c) (isalnum(c) || (c) == '$' || (c) == '_') #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') int64_t readnum(char *str, bool *error) @@ -274,16 +274,19 @@ int64_t readnum(char *str, bool *error) result = 0; while (*r && r < q) { - if (*r < '0' || (*r > '9' && *r < 'A') - || (digit = numvalue(*r)) >= radix) { - *error = true; - return 0; - } - if (result > checklimit || (result == checklimit && digit >= last)) { - warn = true; - } - - result = radix * result + digit; + if (*r != '_') { + if (*r < '0' || (*r > '9' && *r < 'A') + || (digit = numvalue(*r)) >= radix) { + *error = true; + return 0; + } + if (result > checklimit || + (result == checklimit && digit >= last)) { + warn = true; + } + + result = radix * result + digit; + } r++; } @@ -116,34 +116,44 @@ int stdscan(void *private_data, struct tokenval *tv) return tv->t_type = TOKEN_HERE; } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */ bool rn_error; + bool is_hex = false; + bool is_float = false; + char c; r = stdscan_bufptr++; - while (isnumchar(*stdscan_bufptr)) - stdscan_bufptr++; - if (*stdscan_bufptr == '.') { - /* - * a floating point constant - */ - stdscan_bufptr++; - while (isnumchar(*stdscan_bufptr) || - ((stdscan_bufptr[-1] == 'e' - || stdscan_bufptr[-1] == 'E' - || stdscan_bufptr[-1] == 'p' - || stdscan_bufptr[-1] == 'P') - && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) { - stdscan_bufptr++; - } - tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); - return tv->t_type = TOKEN_FLOAT; - } - r = stdscan_copy(r, stdscan_bufptr - r); - tv->t_integer = readnum(r, &rn_error); - stdscan_pop(); - if (rn_error) - return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */ - tv->t_charptr = NULL; - return tv->t_type = TOKEN_NUM; + if (r[0] == '$' || (r[0] == '0' || (r[1] == 'x' || r[1] == 'X'))) + is_hex = true; + + for (;;) { + c = *stdscan_bufptr++; + + if ((!is_hex && (c == 'e' || c == 'E')) || + (c == 'P' || c == 'p')) { + is_float = true; + if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-') + stdscan_bufptr++; + } else if (isnumchar(c) || c == '_') + ; /* just advance */ + else if (c == '.') + is_float = true; + else + break; + } + stdscan_bufptr--; /* Point to first character beyond number */ + + if (is_float) { + tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); + return tv->t_type = TOKEN_FLOAT; + } else { + r = stdscan_copy(r, stdscan_bufptr - r); + tv->t_integer = readnum(r, &rn_error); + stdscan_pop(); + if (rn_error) + return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */ + tv->t_charptr = NULL; + return tv->t_type = TOKEN_NUM; + } } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') { /* a char constant */ char quote = *stdscan_bufptr++, *r; bool rn_warn; |