summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-10-19 13:10:46 -0700
committerH. Peter Anvin <hpa@zytor.com>2007-10-19 13:10:46 -0700
commit2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5 (patch)
tree1fb0b1e1b0116950ba3836ea9a4fda9edba1f939
parenta8eace2b79b1068e54c4af93c41b6e58ba879b83 (diff)
downloadnasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.gz
nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.bz2
nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.zip
Allow underscores in numbers; better detection of FP
- Allow underscores as group separators in numbers, for example: 0x1234_5678 is now a legal number. The underscore is just ignored, it adds no meaning. - Recognize dotless floating-point numbers, such as "1e30". This entails distinguishing hexadecimal numbers in the scanner, since e.g. 0x1e30 is a perfectly legitimate hex constant.
-rw-r--r--float.c103
-rw-r--r--nasmlib.c25
-rw-r--r--stdscan.c60
3 files changed, 110 insertions, 78 deletions
diff --git a/float.c b/float.c
index ec37775..3aa6e5f 100644
--- a/float.c
+++ b/float.c
@@ -113,6 +113,53 @@ static int float_multiply(uint16_t * to, uint16_t * from)
/*
* ---------------------------------------------------------------------------
+ * read an exponent; returns INT32_MAX on error
+ * ---------------------------------------------------------------------------
+ */
+int32_t read_exponent(const char *string, int32_t max)
+{
+ int32_t i = 0;
+ bool neg = false;
+
+ if (*string == '+') {
+ string++;
+ } else if (*string == '-') {
+ neg = true;
+ string++;
+ }
+ while (*string) {
+ if (*string >= '0' && *string <= '9') {
+ i = (i * 10) + (*string - '0');
+
+ /*
+ * To ensure that underflows and overflows are
+ * handled properly we must avoid wraparounds of
+ * the signed integer value that is used to hold
+ * the exponent. Therefore we cap the exponent at
+ * +/-5000, which is slightly more/less than
+ * what's required for normal and denormal numbers
+ * in single, double, and extended precision, but
+ * sufficient to avoid signed integer wraparound.
+ */
+ if (i > max) {
+ break;
+ }
+ } else if (*string == '_') {
+ /* do nothing */
+ } else {
+ error(ERR_NONFATAL,
+ "invalid character in floating-point constant %s: '%c'",
+ "exponent", *string);
+ return INT32_MAX;
+ }
+ string++;
+ }
+
+ return neg ? -i : i;
+}
+
+/*
+ * ---------------------------------------------------------------------------
* convert
* ---------------------------------------------------------------------------
*/
@@ -161,7 +208,6 @@ static bool ieee_flconvert(const char *string, uint16_t * mant,
}
}
} else if (*string == '_') {
-
/* do nothing */
} else {
error(ERR_NONFATAL,
@@ -171,48 +217,15 @@ static bool ieee_flconvert(const char *string, uint16_t * mant,
}
string++;
}
+
if (*string) {
- int32_t i = 0;
- bool neg = false;
- string++; /* eat the E */
- if (*string == '+') {
- string++;
- } else if (*string == '-') {
- neg = true;
- string++;
- }
- while (*string) {
- if (*string >= '0' && *string <= '9') {
- i = (i * 10) + (*string - '0');
-
- /*
- * To ensure that underflows and overflows are
- * handled properly we must avoid wraparounds of
- * the signed integer value that is used to hold
- * the exponent. Therefore we cap the exponent at
- * +/-5000, which is slightly more/less than
- * what's required for normal and denormal numbers
- * in single, double, and extended precision, but
- * sufficient to avoid signed integer wraparound.
- */
- if (i > 5000) {
- break;
- }
- } else if (*string == '_') {
+ int32_t e;
- /* do nothing */
- } else {
- error(ERR_NONFATAL,
- "invalid character in floating-point constant %s: '%c'",
- "exponent", *string);
- return false;
- }
- string++;
- }
- if (neg) {
- i = 0 - i;
- }
- tenpwr += i;
+ string++; /* eat the E */
+ e = read_exponent(string, 5000);
+ if (e == INT32_MAX)
+ return false;
+ tenpwr += e;
}
/*
@@ -480,8 +493,14 @@ static bool ieee_flconvert_hex(const char *string, uint16_t * mant,
twopwr -= 4;
}
} else if (c == 'p' || c == 'P') {
- twopwr += atoi(string);
+ int32_t e;
+ e = read_exponent(string, 16384);
+ if (e == INT32_MAX)
+ return false;
+ twopwr += e;
break;
+ } else if (c == '_') {
+ /* ignore */
} else {
error(ERR_NONFATAL,
"floating-point constant: `%c' is invalid character", c);
diff --git a/nasmlib.c b/nasmlib.c
index 1951cff..7f7fdef 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -193,7 +193,7 @@ char *nasm_strsep(char **stringp, const char *delim)
#endif
-#define lib_isnumchar(c) ( isalnum(c) || (c) == '$')
+#define lib_isnumchar(c) (isalnum(c) || (c) == '$' || (c) == '_')
#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
int64_t readnum(char *str, bool *error)
@@ -274,16 +274,19 @@ int64_t readnum(char *str, bool *error)
result = 0;
while (*r && r < q) {
- if (*r < '0' || (*r > '9' && *r < 'A')
- || (digit = numvalue(*r)) >= radix) {
- *error = true;
- return 0;
- }
- if (result > checklimit || (result == checklimit && digit >= last)) {
- warn = true;
- }
-
- result = radix * result + digit;
+ if (*r != '_') {
+ if (*r < '0' || (*r > '9' && *r < 'A')
+ || (digit = numvalue(*r)) >= radix) {
+ *error = true;
+ return 0;
+ }
+ if (result > checklimit ||
+ (result == checklimit && digit >= last)) {
+ warn = true;
+ }
+
+ result = radix * result + digit;
+ }
r++;
}
diff --git a/stdscan.c b/stdscan.c
index e06fb84..3979144 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -116,34 +116,44 @@ int stdscan(void *private_data, struct tokenval *tv)
return tv->t_type = TOKEN_HERE;
} else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
bool rn_error;
+ bool is_hex = false;
+ bool is_float = false;
+ char c;
r = stdscan_bufptr++;
- while (isnumchar(*stdscan_bufptr))
- stdscan_bufptr++;
- if (*stdscan_bufptr == '.') {
- /*
- * a floating point constant
- */
- stdscan_bufptr++;
- while (isnumchar(*stdscan_bufptr) ||
- ((stdscan_bufptr[-1] == 'e'
- || stdscan_bufptr[-1] == 'E'
- || stdscan_bufptr[-1] == 'p'
- || stdscan_bufptr[-1] == 'P')
- && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) {
- stdscan_bufptr++;
- }
- tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
- return tv->t_type = TOKEN_FLOAT;
- }
- r = stdscan_copy(r, stdscan_bufptr - r);
- tv->t_integer = readnum(r, &rn_error);
- stdscan_pop();
- if (rn_error)
- return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */
- tv->t_charptr = NULL;
- return tv->t_type = TOKEN_NUM;
+ if (r[0] == '$' || (r[0] == '0' || (r[1] == 'x' || r[1] == 'X')))
+ is_hex = true;
+
+ for (;;) {
+ c = *stdscan_bufptr++;
+
+ if ((!is_hex && (c == 'e' || c == 'E')) ||
+ (c == 'P' || c == 'p')) {
+ is_float = true;
+ if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
+ stdscan_bufptr++;
+ } else if (isnumchar(c) || c == '_')
+ ; /* just advance */
+ else if (c == '.')
+ is_float = true;
+ else
+ break;
+ }
+ stdscan_bufptr--; /* Point to first character beyond number */
+
+ if (is_float) {
+ tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
+ return tv->t_type = TOKEN_FLOAT;
+ } else {
+ r = stdscan_copy(r, stdscan_bufptr - r);
+ tv->t_integer = readnum(r, &rn_error);
+ stdscan_pop();
+ if (rn_error)
+ return tv->t_type = TOKEN_ERRNUM; /* some malformation occurred */
+ tv->t_charptr = NULL;
+ return tv->t_type = TOKEN_NUM;
+ }
} else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') { /* a char constant */
char quote = *stdscan_bufptr++, *r;
bool rn_warn;