diff options
author | H. Peter Anvin <hpa@zytor.com> | 2007-10-29 20:20:12 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2007-10-29 20:20:12 -0700 |
commit | 2ce0274303000c95f5f503f1fb0db86fd108b06e (patch) | |
tree | ac275b173a622172cac59dec5c1c374463cb9d2b | |
parent | 052c0bd4843e9222711bec40093ce294d0e32540 (diff) | |
download | nasm-2ce0274303000c95f5f503f1fb0db86fd108b06e.tar.gz nasm-2ce0274303000c95f5f503f1fb0db86fd108b06e.tar.bz2 nasm-2ce0274303000c95f5f503f1fb0db86fd108b06e.zip |
Use a 32-bit floating-point limb size; support 8-bit float
Use a 32-bit limb size ("like a digit, but bigger") for floating-point
conversion. This cuts the number of multiplications per constant by a
factor of four.
This means supporting fractional-limb-sized numbers, so while we're at
it, add support for 8-bit floating point numbers (apparently used in
graphics and in audio compression applications.)
-rw-r--r-- | doc/nasmdoc.src | 25 | ||||
-rw-r--r-- | eval.c | 1 | ||||
-rw-r--r-- | float.c | 344 | ||||
-rw-r--r-- | nasm.h | 1 | ||||
-rw-r--r-- | parser.c | 5 | ||||
-rw-r--r-- | test/float.asm | 37 | ||||
-rw-r--r-- | test/floatexp.asm | 26 | ||||
-rw-r--r-- | tokens.dat | 1 |
8 files changed, 275 insertions, 165 deletions
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 02cd8af..2cfa005 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1164,7 +1164,6 @@ file. They can be invoked in a wide range of ways: \c dt 1.234567e20 ; extended-precision float \c{DT} and \c{DO} do not accept \i{numeric constants} as operands. -\c{DB} does not accept \i{floating-point} numbers as operands. \S{resb} \c{RESB} and friends: Declaring \i{Uninitialized} Data @@ -1410,10 +1409,11 @@ when they are operands to \c{dw}. \S{fltconst} \I{floating-point, constants}Floating-Point Constants \i{Floating-point} constants are acceptable only as arguments to -\i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as arguments -to the special operators \i\c{__float16__}, \i\c{__float32__}, -\i\c{__float64__}, \i\c{__float80m__}, \i\c{__float80e__}, -\i\c{__float128l__}, and \i\c{__float128h__}. +\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as +arguments to the special operators \i\c{__float8__}, +\i\c{__float16__}, \i\c{__float32__}, \i\c{__float64__}, +\i\c{__float80m__}, \i\c{__float80e__}, \i\c{__float128l__}, and +\i\c{__float128h__}. Floating-point constants are expressed in the traditional form: digits, then a period, then optionally more digits, then optionally an @@ -1427,14 +1427,21 @@ notation. Some examples: -\c dw -0.5 ; IEEE half precision +\c db -0.2 ; "Quarter precision" +\c dw -0.5 ; IEEE 754r/SSE5 half precision \c dd 1.2 ; an easy one -\c dd 0x1p+2 ; 1.0x2^2 = 4.0 +\c dd 0x1p+2 ; 1.0x2^2 = 4.0 \c dq 1.e10 ; 10,000,000,000 \c dq 1.e+10 ; synonymous with 1.e10 \c dq 1.e-10 ; 0.000 000 000 1 \c dt 3.141592653589793238462 ; pi -\c do 1.e+4000 ; IEEE quad precision +\c do 1.e+4000 ; IEEE 754r quad precision + +The 8-bit "quarter-precision" floating-point format is +sign:exponent:mantissa = 1:4:3 with an exponent bias of 7. This +appears to be the most frequently used 8-bit floating-point format, +although it is not covered by any formal standard. This is sometimes +called a "\i{minifloat}." The special operators are used to produce floating-point numbers in other contexts. They produce the binary representation of a specific @@ -1452,7 +1459,7 @@ For example: ... would assign the binary representation of pi as a 64-bit floating point number into \c{RAX}. This is exactly equivalent to: -\c mov rax,0x401921fb54442d18 +\c mov rax,0x400921fb54442d18 NASM cannot do compile-time arithmetic on floating-point constants. This is because NASM is designed to be portable - although it always @@ -612,6 +612,7 @@ static expr *eval_floatize(enum floatize type) static const struct { int bytes, start, len; } formats[] = { + { 1, 0, 1 }, /* FLOAT_8 */ { 2, 0, 2 }, /* FLOAT_16 */ { 4, 0, 4 }, /* FLOAT_32 */ { 8, 0, 8 }, /* FLOAT_64 */ @@ -34,19 +34,38 @@ static enum float_round rc = FLOAT_RC_NEAR; /* rounding control */ * ----------- */ +/* "A limb is like a digit but bigger */ +typedef uint32_t fp_limb; +typedef uint64_t fp_2limb; + +#define LIMB_BITS 32 +#define LIMB_BYTES (LIMB_BITS/8) +#define LIMB_TOP_BIT ((fp_limb)1 << (LIMB_BITS-1)) +#define LIMB_MASK ((fp_limb)(~0)) +#define LIMB_ALL_BYTES ((fp_limb)0x01010101) +#define LIMB_BYTE(x) ((x)*LIMB_ALL_BYTES) + +#if defined(__i386__) || defined(__x86_64__) +#define put(a,b) (*(uint32_t *)(a) = (b)) +#else +#define put(a,b) (((a)[0] = (b)), \ + ((a)[1] = (b) >> 8), \ + ((a)[2] = (b) >> 16), \ + ((a)[3] = (b) >> 24)) +#endif + /* 112 bits + 64 bits for accuracy + 16 bits for rounding */ -#define MANT_WORDS 12 +#define MANT_LIMBS 6 /* 52 digits fit in 176 bits because 10^53 > 2^176 > 10^52 */ #define MANT_DIGITS 52 -/* the format and the argument list depend on MANT_WORDS */ -#define MANT_FMT "%04x%04x_%04x%04x_%04x%04x_%04x%04x_%04x%04x_%04x%04x" +/* the format and the argument list depend on MANT_LIMBS */ +#define MANT_FMT "%08x_%08x_%08x_%08x_%08x_%08x" #define MANT_ARG SOME_ARG(mant, 0) #define SOME_ARG(a,i) (a)[(i)+0], (a)[(i)+1], (a)[(i)+2], (a)[(i)+3], \ - (a)[(i)+4], (a)[(i)+5], (a)[(i)+6], (a)[(i)+7], (a)[(i)+8], \ - (a)[(i)+9], (a)[(i)+10], (a)[(i)+11] + (a)[(i)+4], (a)[(i)+5] /* * --------------------------------------------------------------------------- @@ -65,10 +84,10 @@ static enum float_round rc = FLOAT_RC_NEAR; /* rounding control */ * multiply * --------------------------------------------------------------------------- */ -static int float_multiply(uint16_t * to, uint16_t * from) +static int float_multiply(fp_limb *to, fp_limb *from) { - uint32_t temp[MANT_WORDS * 2]; - int32_t i, j; + fp_2limb temp[MANT_LIMBS * 2]; + int i, j; /* * guaranteed that top bit of 'from' is set -- so we only have @@ -79,32 +98,32 @@ static int float_multiply(uint16_t * to, uint16_t * from) memset(temp, 0, sizeof temp); - for (i = 0; i < MANT_WORDS; i++) { - for (j = 0; j < MANT_WORDS; j++) { - uint32_t n; - n = (uint32_t) to[i] * (uint32_t) from[j]; - temp[i + j] += n >> 16; - temp[i + j + 1] += n & 0xFFFF; + for (i = 0; i < MANT_LIMBS; i++) { + for (j = 0; j < MANT_LIMBS; j++) { + fp_2limb n; + n = (fp_2limb) to[i] * (fp_2limb) from[j]; + temp[i + j] += n >> LIMB_BITS; + temp[i + j + 1] += (fp_limb)n; } } - for (i = MANT_WORDS * 2; --i;) { - temp[i - 1] += temp[i] >> 16; - temp[i] &= 0xFFFF; + for (i = MANT_LIMBS * 2; --i;) { + temp[i - 1] += temp[i] >> LIMB_BITS; + temp[i] &= LIMB_MASK; } dprintf(("%s=" MANT_FMT "_" MANT_FMT "\n", "temp", SOME_ARG(temp, 0), - SOME_ARG(temp, MANT_WORDS))); + SOME_ARG(temp, MANT_LIMBS))); - if (temp[0] & 0x8000) { - for (i = 0; i < MANT_WORDS; i++) { - to[i] = temp[i] & 0xFFFF; + if (temp[0] & LIMB_TOP_BIT) { + for (i = 0; i < MANT_LIMBS; i++) { + to[i] = temp[i] & LIMB_MASK; } dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), 0)); return 0; } else { - for (i = 0; i < MANT_WORDS; i++) { - to[i] = (temp[i] << 1) + !!(temp[i + 1] & 0x8000); + for (i = 0; i < MANT_LIMBS; i++) { + to[i] = (temp[i] << 1) + !!(temp[i + 1] & LIMB_TOP_BIT); } dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), -1)); return -1; @@ -162,13 +181,13 @@ static int32_t read_exponent(const char *string, int32_t max) * convert * --------------------------------------------------------------------------- */ -static bool ieee_flconvert(const char *string, uint16_t * mant, +static bool ieee_flconvert(const char *string, fp_limb *mant, int32_t * exponent) { char digits[MANT_DIGITS]; char *p, *q, *r; - uint16_t mult[MANT_WORDS], bit; - uint16_t *m; + fp_limb mult[MANT_LIMBS], bit; + fp_limb *m; int32_t tenpwr, twopwr; int32_t extratwos; bool started, seendot, warned; @@ -243,16 +262,16 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, /* * Now convert [digits,p) to our internal representation. */ - bit = 0x8000; - for (m = mant; m < mant + MANT_WORDS; m++) { + bit = LIMB_TOP_BIT; + for (m = mant; m < mant + MANT_LIMBS; m++) { *m = 0; } m = mant; q = digits; started = false; twopwr = 0; - while (m < mant + MANT_WORDS) { - uint16_t carry = 0; + while (m < mant + MANT_LIMBS) { + fp_limb carry = 0; while (p > q && !p[-1]) { p--; } @@ -276,7 +295,7 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, } if (started) { if (bit == 1) { - bit = 0x8000; + bit = LIMB_TOP_BIT; m++; } else { bit >>= 1; @@ -299,10 +318,10 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, * Now multiply 'mant' by 5^tenpwr. */ if (tenpwr < 0) { /* mult = 5^-1 = 0.2 */ - for (m = mult; m < mult + MANT_WORDS - 1; m++) { - *m = 0xCCCC; + for (m = mult; m < mult + MANT_LIMBS - 1; m++) { + *m = LIMB_BYTE(0xcc); } - mult[MANT_WORDS - 1] = 0xCCCD; + mult[MANT_LIMBS - 1] = LIMB_BYTE(0xcc)+1; extratwos = -2; tenpwr = -tenpwr; @@ -314,8 +333,8 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, * the exponent parsing code, this shouldn't happen though. */ } else if (tenpwr > 0) { /* mult = 5^+1 = 5.0 */ - mult[0] = 0xA000; - for (m = mult + 1; m < mult + MANT_WORDS; m++) { + mult[0] = (fp_limb)5 << (LIMB_BITS-3); /* 0xA000... */ + for (m = mult + 1; m < mult + MANT_LIMBS; m++) { *m = 0; } extratwos = 3; @@ -358,68 +377,102 @@ static bool ieee_flconvert(const char *string, uint16_t * mant, /* * --------------------------------------------------------------------------- + * operations of specific bits + * --------------------------------------------------------------------------- + */ + +/* Set a bit, using *bigendian* bit numbering (0 = MSB) */ +static void set_bit(fp_limb *mant, int bit) +{ + mant[bit/LIMB_BITS] |= LIMB_TOP_BIT >> (bit & (LIMB_BITS-1)); +} + +/* Test a single bit */ +static int test_bit(const fp_limb *mant, int bit) +{ + return (mant[bit/LIMB_BITS] >> (~bit & (LIMB_BITS-1))) & 1; +} + +/* Report if the mantissa value is all zero */ +static bool is_zero(const fp_limb *mant) +{ + int i; + + for (i = 0; i < MANT_LIMBS; i++) + if (mant[i]) + return false; + + return true; +} + +/* + * --------------------------------------------------------------------------- * round a mantissa off after i words * --------------------------------------------------------------------------- */ #define ROUND_COLLECT_BITS \ - for (j = i; j < MANT_WORDS; j++) { \ - m = m | mant[j]; \ - } + do { \ + m = mant[i] & (2*bit-1); \ + for (j = i+1; j < MANT_LIMBS; j++) \ + m = m | mant[j]; \ + } while (0) #define ROUND_ABS_DOWN \ - for (j = i; j < MANT_WORDS; j++) { \ - mant[j] = 0x0000; \ - } + do { \ + mant[i] &= ~(bit-1); \ + for (j = i+1; j < MANT_LIMBS; j++) \ + mant[j] = 0; \ + return false; \ + } while (0) #define ROUND_ABS_UP \ do { \ - ++mant[--i]; \ - mant[i] &= 0xFFFF; \ - } while (i > 0 && !mant[i]); \ - return (!i && !mant[i]); - -static bool ieee_round(int sign, uint16_t * mant, int32_t i) + mant[i] = (mant[i] & ~(bit-1)) + bit; \ + for (j = i+1; j < MANT_LIMBS; j++) \ + mant[j] = 0; \ + while (i > 0 && !mant[i]) \ + ++mant[--i]; \ + return !mant[0]; \ + } while (0) + +static bool ieee_round(bool minus, fp_limb *mant, int bits) { - uint16_t m = 0; + fp_limb m = 0; int32_t j; - if ((sign == 0x0000) || (sign == 0x8000)) { - if (rc == FLOAT_RC_NEAR) { - if (mant[i] & 0x8000) { - mant[i] &= 0x7FFF; - ROUND_COLLECT_BITS; - mant[i] |= 0x8000; - if (m) { - ROUND_ABS_UP; - } else { - if (mant[i - 1] & 1) { - ROUND_ABS_UP; - } else { - ROUND_ABS_DOWN; - } - } - } else { - ROUND_ABS_DOWN; - } - } else if (((sign == 0x0000) && (rc == FLOAT_RC_DOWN)) - || ((sign == 0x8000) && (rc == FLOAT_RC_UP))) { - ROUND_COLLECT_BITS; - if (m) { - ROUND_ABS_DOWN; - } - } else if (((sign == 0x0000) && (rc == FLOAT_RC_UP)) - || ((sign == 0x8000) && (rc == FLOAT_RC_DOWN))) { - ROUND_COLLECT_BITS; - if (m) { - ROUND_ABS_UP; - } - } else if (rc == FLOAT_RC_ZERO) { - ROUND_ABS_DOWN; - } else { - error(ERR_PANIC, "float_round() can't handle rc=%i", rc); - } + int i = bits / LIMB_BITS; + int p = bits % LIMB_BITS; + fp_limb bit = LIMB_TOP_BIT >> p; + + if (rc == FLOAT_RC_NEAR) { + if (mant[i] & bit) { + mant[i] &= ~bit; + ROUND_COLLECT_BITS; + mant[i] |= bit; + if (m) { + ROUND_ABS_UP; + } else { + if (test_bit(mant, bits-1)) { + ROUND_ABS_UP; + } else { + ROUND_ABS_DOWN; + } + } + } else { + ROUND_ABS_DOWN; + } + } else if (rc == FLOAT_RC_ZERO || + rc == (minus ? FLOAT_RC_UP : FLOAT_RC_DOWN)) { + ROUND_ABS_DOWN; } else { - error(ERR_PANIC, "float_round() can't handle sign=%i", sign); + /* rc == (minus ? FLOAT_RC_DOWN : FLOAT_RC_UP) */ + /* Round toward +/- infinity */ + ROUND_COLLECT_BITS; + if (m) { + ROUND_ABS_UP; + } else { + ROUND_ABS_DOWN; + } } return false; } @@ -437,17 +490,17 @@ static unsigned int hexval(char c) /* Handle floating-point numbers with radix 2^bits and binary exponent */ static bool ieee_flconvert_bin(const char *string, int bits, - uint16_t * mant, int32_t * exponent) + fp_limb *mant, int32_t *exponent) { static const int log2tbl[16] = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; - uint16_t mult[MANT_WORDS + 1], *mp; + fp_limb mult[MANT_LIMBS + 1], *mp; int ms; int32_t twopwr; bool seendot, seendigit; unsigned char c; int radix = 1 << bits; - unsigned int v; + fp_limb v; twopwr = 0; seendot = seendigit = false; @@ -469,9 +522,9 @@ static bool ieee_flconvert_bin(const char *string, int bits, if (!seendigit && v) { int l = log2tbl[v]; - seendigit = 1; + seendigit = true; mp = mult; - ms = 15-l; + ms = (LIMB_BITS-1)-l; twopwr = seendot ? twopwr-bits+l : l+1-bits; } @@ -480,9 +533,9 @@ static bool ieee_flconvert_bin(const char *string, int bits, if (ms <= 0) { *mp |= v >> -ms; mp++; - if (mp > &mult[MANT_WORDS]) - mp = &mult[MANT_WORDS]; /* Guard slot */ - ms += 16; + if (mp > &mult[MANT_LIMBS]) + mp = &mult[MANT_LIMBS]; /* Guard slot */ + ms += LIMB_BITS; } *mp |= v << ms; ms -= bits; @@ -510,10 +563,10 @@ static bool ieee_flconvert_bin(const char *string, int bits, } if (!seendigit) { - memset(mant, 0, 2 * MANT_WORDS); /* Zero */ + memset(mant, 0, sizeof mult); /* Zero */ *exponent = 0; } else { - memcpy(mant, mult, 2 * MANT_WORDS); + memcpy(mant, mult, sizeof mult); *exponent = twopwr; } @@ -523,22 +576,22 @@ static bool ieee_flconvert_bin(const char *string, int bits, /* * Shift a mantissa to the right by i bits. */ -static void ieee_shr(uint16_t * mant, int i) +static void ieee_shr(fp_limb *mant, int i) { - uint16_t n, m; + fp_limb n, m; int j = 0; int sr, sl, offs; - sr = i%16; sl = 16-sr; - offs = i/16; + sr = i % LIMB_BITS; sl = LIMB_BITS-sr; + offs = i/LIMB_BITS; if (sr == 0) { if (offs) - for (j = MANT_WORDS-1; j >= offs; j--) + for (j = MANT_LIMBS-1; j >= offs; j--) mant[j] = mant[j-offs]; } else { - n = mant[MANT_WORDS-1-offs] >> sr; - for (j = MANT_WORDS-1; j > offs; j--) { + n = mant[MANT_LIMBS-1-offs] >> sr; + for (j = MANT_LIMBS-1; j > offs; j--) { m = mant[j-offs-1]; mant[j] = (m << sl) | n; n = m >> sr; @@ -549,36 +602,6 @@ static void ieee_shr(uint16_t * mant, int i) mant[j--] = 0; } -#if defined(__i386__) || defined(__x86_64__) -#define put(a,b) (*(uint16_t *)(a) = (b)) -#else -#define put(a,b) (((a)[0] = (b)), ((a)[1] = (b) >> 8)) -#endif - -/* Set a bit, using *bigendian* bit numbering (0 = MSB) */ -static void set_bit(uint16_t *mant, int bit) -{ - mant[bit >> 4] |= 1 << (~bit & 15); -} - -/* Test a single bit */ -static int test_bit(const uint16_t *mant, int bit) -{ - return (mant[bit >> 4] >> (~bit & 15)) & 1; -} - -/* Report if the mantissa value is all zero */ -static bool is_zero(const uint16_t *mant) -{ - int i; - - for (i = 0; i < MANT_WORDS; i++) - if (mant[i]) - return false; - - return true; -} - /* Produce standard IEEE formats, with implicit or explicit integer bit; this makes the following assumptions: @@ -588,7 +611,7 @@ static bool is_zero(const uint16_t *mant) - the exponent bias is 2^(n-1)-1 for an n-bit exponent */ struct ieee_format { - int words; + int bytes; int mantissa; /* Fractional bits in the mantissa */ int explicit; /* Explicit integer */ int exponent; /* Bits in the exponent */ @@ -601,12 +624,16 @@ struct ieee_format { * The 32- and 64-bit formats are the original IEEE 754 formats. * * The 80-bit format is x87-specific, but widely used. + * + * The 8-bit format appears to be the consensus 8-bit floating-point + * format. It is apparently used in graphics applications. */ -static const struct ieee_format ieee_16 = { 1, 10, 0, 5 }; -static const struct ieee_format ieee_32 = { 2, 23, 0, 8 }; -static const struct ieee_format ieee_64 = { 4, 52, 0, 11 }; -static const struct ieee_format ieee_80 = { 5, 63, 1, 15 }; -static const struct ieee_format ieee_128 = { 8, 112, 0, 15 }; +static const struct ieee_format ieee_8 = { 1, 3, 0, 4 }; +static const struct ieee_format ieee_16 = { 2, 10, 0, 5 }; +static const struct ieee_format ieee_32 = { 4, 23, 0, 8 }; +static const struct ieee_format ieee_64 = { 8, 52, 0, 11 }; +static const struct ieee_format ieee_80 = { 10, 63, 1, 15 }; +static const struct ieee_format ieee_128 = { 16, 112, 0, 15 }; /* Types of values we can generate */ enum floats { @@ -618,20 +645,21 @@ enum floats { FL_SNAN }; -static int to_float(const char *str, int sign, uint8_t * result, +static int to_float(const char *str, int s, uint8_t * result, const struct ieee_format *fmt) { - uint16_t mant[MANT_WORDS], *mp; + fp_limb mant[MANT_LIMBS], *mp, m; int32_t exponent = 0; int32_t expmax = 1 << (fmt->exponent - 1); - uint16_t one_mask = 0x8000 >> ((fmt->exponent+fmt->explicit) % 16); - int one_pos = (fmt->exponent+fmt->explicit)/16; + fp_limb one_mask = LIMB_TOP_BIT >> + ((fmt->exponent+fmt->explicit) % LIMB_BITS); + int one_pos = (fmt->exponent+fmt->explicit)/LIMB_BITS; int i; int shift; enum floats type; bool ok; - - sign = (sign < 0 ? 0x8000 : 0); + bool minus = s < 0; + int bits = fmt->bytes * 8; if (str[0] == '_') { /* Special tokens */ @@ -689,7 +717,7 @@ static int to_float(const char *str, int sign, uint8_t * result, if (!ok) { type = FL_QNAN; - } else if (mant[0] & 0x8000) { + } else if (mant[0] & LIMB_TOP_BIT) { /* * Non-zero. */ @@ -728,13 +756,13 @@ static int to_float(const char *str, int sign, uint8_t * result, shift = -(exponent + expmax - 2 - fmt->exponent) + fmt->explicit; ieee_shr(mant, shift); - ieee_round(sign, mant, fmt->words); + ieee_round(minus, mant, bits); if (mant[one_pos] & one_mask) { /* One's position is set, we rounded up into normal range */ exponent = 1; if (!fmt->explicit) mant[one_pos] &= ~one_mask; /* remove explicit one */ - mant[0] |= exponent << (15 - fmt->exponent); + mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent); } else { if (daz || is_zero(mant)) { /* Flush denormals to zero */ @@ -754,7 +782,7 @@ static int to_float(const char *str, int sign, uint8_t * result, case FL_NORMAL: exponent += expmax - 1; ieee_shr(mant, fmt->exponent+fmt->explicit); - ieee_round(sign, mant, fmt->words); + ieee_round(minus, mant, bits); /* did we scale up by one? */ if (test_bit(mant, fmt->exponent+fmt->explicit-1)) { ieee_shr(mant, 1); @@ -770,7 +798,7 @@ static int to_float(const char *str, int sign, uint8_t * result, if (!fmt->explicit) mant[one_pos] &= ~one_mask; /* remove explicit one */ - mant[0] |= exponent << (15 - fmt->exponent); + mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent); break; case FL_INFINITY: @@ -778,7 +806,8 @@ static int to_float(const char *str, int sign, uint8_t * result, case FL_SNAN: overflow: memset(mant, 0, sizeof mant); - mant[0] = ((1 << fmt->exponent)-1) << (15 - fmt->exponent); + mant[0] = (((fp_limb)1 << fmt->exponent)-1) + << (LIMB_BITS-1 - fmt->exponent); if (fmt->explicit) mant[one_pos] |= one_mask; if (type == FL_QNAN) @@ -788,12 +817,17 @@ static int to_float(const char *str, int sign, uint8_t * result, break; } - mant[0] |= sign; + mant[0] |= minus ? LIMB_TOP_BIT : 0; + + m = mant[fmt->bytes/LIMB_BYTES]; + for (i = LIMB_BYTES-(fmt->bytes % LIMB_BYTES); i < LIMB_BYTES; i++) + *result++ = m >> (i*8); - for (mp = &mant[fmt->words], i = 0; i < fmt->words; i++) { - uint16_t m = *--mp; + for (mp = &mant[fmt->bytes/LIMB_BYTES], i = 0; + i < fmt->bytes; i += LIMB_BYTES) { + m = *--mp; put(result, m); - result += 2; + result += LIMB_BYTES; } return 1; /* success */ @@ -805,6 +839,8 @@ int float_const(const char *number, int32_t sign, uint8_t * result, error = err; switch (bytes) { + case 1: + return to_float(number, sign, result, &ieee_8); case 2: return to_float(number, sign, result, &ieee_16); case 4: @@ -170,6 +170,7 @@ enum { /* token types, other than chars */ }; enum floatize { + FLOAT_8, FLOAT_16, FLOAT_32, FLOAT_64, @@ -369,6 +369,9 @@ insn *parse_line(int pass, char *buffer, insn * result, eop->type = EOT_DB_STRING; result->eops_float = true; switch (result->opcode) { + case I_DB: + eop->stringlen = 1; + break; case I_DW: eop->stringlen = 2; break; @@ -386,7 +389,7 @@ insn *parse_line(int pass, char *buffer, insn * result, break; default: error(ERR_NONFATAL, "floating-point constant" - " encountered in `db' instruction"); + " encountered in unknown instruction"); /* * fix suggested by Pedro Gimeno... original line * was: diff --git a/test/float.asm b/test/float.asm index 759b61c..df2d96c 100644 --- a/test/float.asm +++ b/test/float.asm @@ -2,6 +2,36 @@ ; Test of floating-point formats ; +; 8-bit + db 1.0 + db +1.0 + db -1.0 + db 1.5 + db +1.5 + db -1.5 + db 0.0 + db +0.0 + db -0.0 + db 1.83203125 + db +1.83203125 + db -1.83203125 + db 1.83203125e1 + db +1.83203125e1 + db -1.83203125e1 + db 1.83203125e-1 + db +1.83203125e-1 + db -1.83203125e-1 + db 1.13203125e-2 ; Denormal! + db +1.13203125e-2 ; Denormal! + db -1.13203125e-2 ; Denormal! + db __Infinity__ + db +__Infinity__ + db -__Infinity__ + db __NaN__ + db __QNaN__ + db __SNaN__ + db 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + ; 16-bit dw 1.0 dw +1.0 @@ -30,6 +60,7 @@ dw __NaN__ dw __QNaN__ dw __SNaN__ + dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 ; 32-bit dd 1.0 @@ -59,6 +90,7 @@ dd __NaN__ dd __QNaN__ dd __SNaN__ + dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 ; 64-bit dq 1.0 @@ -88,7 +120,8 @@ dq __NaN__ dq __QNaN__ dq __SNaN__ - + dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + ; 80-bit dt 1.0 dt +1.0 @@ -117,6 +150,7 @@ dt __NaN__ dt __QNaN__ dt __SNaN__ + dt 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 ; 128-bit do 1.0 @@ -146,3 +180,4 @@ do __NaN__ do __QNaN__ do __SNaN__ + do 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 diff --git a/test/floatexp.asm b/test/floatexp.asm index 7e6a01c..b3d14f7 100644 --- a/test/floatexp.asm +++ b/test/floatexp.asm @@ -3,6 +3,32 @@ ; Test of floating-point formats ; +; 8-bit + mov al,__float8__(1.0) + mov al,__float8__(+1.0) + mov al,__float8__(-1.0) + mov al,__float8__(0.0) + mov al,__float8__(+0.0) + mov al,__float8__(-0.0) + mov al,__float8__(1.83203125) + mov al,__float8__(+1.83203125) + mov al,__float8__(-1.83203125) + mov al,__float8__(1.83203125e1) + mov al,__float8__(+1.83203125e1) + mov al,__float8__(-1.83203125e1) + mov al,__float8__(1.83203125e-1) + mov al,__float8__(+1.83203125e-1) + mov al,__float8__(-1.83203125e-1) + mov al,__float8__(1.13203125e-2) ; Denormal! + mov al,__float8__(+1.13203125e-2) ; Denormal! + mov al,__float8__(-1.13203125e-2) ; Denormal! + mov al,__float8__(__Infinity__) + mov al,__float8__(+__Infinity__) + mov al,__float8__(-__Infinity__) + mov al,__float8__(__NaN__) + mov al,__float8__(__QNaN__) + mov al,__float8__(__SNaN__) + ; 16-bit mov ax,__float16__(1.0) mov ax,__float16__(+1.0) @@ -43,6 +43,7 @@ __qnan__ __snan__ % TOKEN_FLOATIZE, 0, FLOAT_{__float*__} +__float8__ __float16__ __float32__ __float64__ |