diff options
author | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
---|---|---|
committer | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
commit | ef1e2ab328087c61a6878c1e84f4fc5d710aebce (patch) | |
tree | dee1bbb89e9d722e16b0d1485e3cdd1b6c8e2cfa /src/palrt/decarith.cpp | |
download | coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.gz coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.bz2 coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.zip |
Initial commit to populate CoreCLR repo
[tfs-changeset: 1407945]
Diffstat (limited to 'src/palrt/decarith.cpp')
-rw-r--r-- | src/palrt/decarith.cpp | 1268 |
1 files changed, 1268 insertions, 0 deletions
diff --git a/src/palrt/decarith.cpp b/src/palrt/decarith.cpp new file mode 100644 index 0000000000..03b206924d --- /dev/null +++ b/src/palrt/decarith.cpp @@ -0,0 +1,1268 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// + +// +// =========================================================================== +// File: decarith.cpp +// +// =========================================================================== +/*** +* +*Purpose: +* Implement arithmetic for Decimal data type. +* +*Implementation Notes: +* +*****************************************************************************/ + +#include "common.h" + +#include <oleauto.h> +#include "convert.h" + +//*********************************************************************** +// +// Additional Decimal and Int64 definitions +// +#define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \ + DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); } + +#define DEC_SCALE_MAX 28 +#define POWER10_MAX 9 + +// The following functions are defined in the classlibnative\bcltype\decimal.cpp +ULONG Div96By32(ULONG *rgulNum, ULONG ulDen); +ULONG Div96By64(ULONG *rgulNum, SPLIT64 sdlDen); +ULONG Div128By96(ULONG *rgulNum, ULONG *rgulDen); +int ScaleResult(ULONG *rgulRes, int iHiRes, int iScale); +ULONG IncreaseScale(ULONG *rgulNum, ULONG ulPwr); + +//*********************************************************************** +// +// Data tables +// + +static ULONG rgulPower10[POWER10_MAX+1] = {1, 10, 100, 1000, 10000, 100000, 1000000, + 10000000, 100000000, 1000000000}; + +struct DECOVFL +{ + ULONG Hi; + ULONG Mid; +}; + +static DECOVFL PowerOvfl[] = { +// This is a table of the largest values that can be in the upper two +// ULONGs of a 96-bit number that will not overflow when multiplied +// by a given power. For the upper word, this is a table of +// 2^32 / 10^n for 1 <= n <= 9. For the lower word, this is the +// remaining fraction part * 2^32. 2^32 = 4294967296. +// + { 429496729UL, 2576980377UL }, // 10^1 remainder 0.6 + { 42949672UL, 4123168604UL }, // 10^2 remainder 0.16 + { 4294967UL, 1271310319UL }, // 10^3 remainder 0.616 + { 429496UL, 3133608139UL }, // 10^4 remainder 0.1616 + { 42949UL, 2890341191UL }, // 10^5 remainder 0.51616 + { 4294UL, 4154504685UL }, // 10^6 remainder 0.551616 + { 429UL, 2133437386UL }, // 10^7 remainder 0.9551616 + { 42UL, 4078814305UL }, // 10^8 remainder 0.09991616 +// { 4UL, 1266874889UL }, // 10^9 remainder 0.709551616 +}; + +#define OVFL_MAX_9_HI 4 +#define OVFL_MAX_9_MID 1266874889 + +#define OVFL_MAX_5_HI 42949 +#define OVFL_MAX_5_MID 2890341191 + +#define OVFL_MAX_1_HI 429496729 + + + +//*********************************************************************** +// +// static helper functions +// + +/*** +* FullDiv64By32 +* +* Entry: +* pdlNum - Pointer to 64-bit dividend +* ulDen - 32-bit divisor +* +* Purpose: +* Do full divide, yielding 64-bit result and 32-bit remainder. +* +* Exit: +* Quotient overwrites dividend. +* Returns remainder. +* +* Exceptions: +* None. +* +***********************************************************************/ + +ULONG FullDiv64By32(DWORDLONG *pdlNum, ULONG ulDen) +{ + SPLIT64 sdlTmp; + SPLIT64 sdlRes; + + sdlTmp.int64 = *pdlNum; + sdlRes.u.Hi = 0; + + if (sdlTmp.u.Hi >= ulDen) { + // DivMod64by32 returns quotient in Lo, remainder in Hi. + // + sdlRes.u.Lo = sdlTmp.u.Hi; + sdlRes.int64 = DivMod64by32(sdlRes.int64, ulDen); + sdlTmp.u.Hi = sdlRes.u.Hi; + sdlRes.u.Hi = sdlRes.u.Lo; + } + + sdlTmp.int64 = DivMod64by32(sdlTmp.int64, ulDen); + sdlRes.u.Lo = sdlTmp.u.Lo; + *pdlNum = sdlRes.int64; + return sdlTmp.u.Hi; +} + + + + +/*** +* SearchScale +* +* Entry: +* ulResHi - Top ULONG of quotient +* ulResLo - Middle ULONG of quotient +* iScale - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX +* +* Purpose: +* Determine the max power of 10, <= 9, that the quotient can be scaled +* up by and still fit in 96 bits. +* +* Exit: +* Returns power of 10 to scale by, -1 if overflow error. +* +***********************************************************************/ + +int SearchScale(ULONG ulResHi, ULONG ulResLo, int iScale) +{ + int iCurScale; + + // Quick check to stop us from trying to scale any more. + // + if (ulResHi > OVFL_MAX_1_HI || iScale >= DEC_SCALE_MAX) { + iCurScale = 0; + goto HaveScale; + } + + if (iScale > DEC_SCALE_MAX - 9) { + // We can't scale by 10^9 without exceeding the max scale factor. + // See if we can scale to the max. If not, we'll fall into + // standard search for scale factor. + // + iCurScale = DEC_SCALE_MAX - iScale; + if (ulResHi < PowerOvfl[iCurScale - 1].Hi) + goto HaveScale; + + if (ulResHi == PowerOvfl[iCurScale - 1].Hi) { + UpperEq: + if (ulResLo >= PowerOvfl[iCurScale - 1].Mid) + iCurScale--; + goto HaveScale; + } + } + else if (ulResHi < OVFL_MAX_9_HI || (ulResHi == OVFL_MAX_9_HI && + ulResLo < OVFL_MAX_9_MID)) + return 9; + + // Search for a power to scale by < 9. Do a binary search + // on PowerOvfl[]. + // + iCurScale = 5; + if (ulResHi < OVFL_MAX_5_HI) + iCurScale = 7; + else if (ulResHi > OVFL_MAX_5_HI) + iCurScale = 3; + else + goto UpperEq; + + // iCurScale is 3 or 7. + // + if (ulResHi < PowerOvfl[iCurScale - 1].Hi) + iCurScale++; + else if (ulResHi > PowerOvfl[iCurScale - 1].Hi) + iCurScale--; + else + goto UpperEq; + + // iCurScale is 2, 4, 6, or 8. + // + // In all cases, we already found we could not use the power one larger. + // So if we can use this power, it is the biggest, and we're done. If + // we can't use this power, the one below it is correct for all cases + // unless it's 10^1 -- we might have to go to 10^0 (no scaling). + // + if (ulResHi > PowerOvfl[iCurScale - 1].Hi) + iCurScale--; + + if (ulResHi == PowerOvfl[iCurScale - 1].Hi) + goto UpperEq; + +HaveScale: + // iCurScale = largest power of 10 we can scale by without overflow, + // iCurScale < 9. See if this is enough to make scale factor + // positive if it isn't already. + // + if (iCurScale + iScale < 0) + iCurScale = -1; + + return iCurScale; +} + +/*** +* DecFixInt +* +* Entry: +* pdecRes - Pointer to Decimal result location +* pdecIn - Pointer to Decimal operand +* +* Purpose: +* Chop the value to integer. Return remainder so Int() function +* can round down if non-zero. +* +* Exit: +* Returns remainder. +* +* Exceptions: +* None. +* +***********************************************************************/ + +ULONG DecFixInt(LPDECIMAL pdecRes, LPDECIMAL pdecIn) +{ + ULONG rgulNum[3]; + ULONG ulRem; + ULONG ulPwr; + int iScale; + + if (pdecIn->u.u.scale > 0) { + rgulNum[0] = pdecIn->v.v.Lo32; + rgulNum[1] = pdecIn->v.v.Mid32; + rgulNum[2] = pdecIn->Hi32; + iScale = pdecIn->u.u.scale; + pdecRes->u.u.sign = pdecIn->u.u.sign; + ulRem = 0; + + do { + if (iScale > POWER10_MAX) + ulPwr = ulTenToNine; + else + ulPwr = rgulPower10[iScale]; + + ulRem |= Div96By32(rgulNum, ulPwr); + iScale -= 9; + }while (iScale > 0); + + pdecRes->v.v.Lo32 = rgulNum[0]; + pdecRes->v.v.Mid32 = rgulNum[1]; + pdecRes->Hi32 = rgulNum[2]; + pdecRes->u.u.scale = 0; + + return ulRem; + } + + COPYDEC(*pdecRes, *pdecIn) + return 0; +} + + +//*********************************************************************** +// +// +// + +//********************************************************************** +// +// VarDecMul - Decimal Multiply +// +//********************************************************************** + +STDAPI VarDecMul(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes) +{ + SPLIT64 sdlTmp; + SPLIT64 sdlTmp2; + SPLIT64 sdlTmp3; + int iScale; + int iHiProd; + ULONG ulPwr; + ULONG ulRemLo; + ULONG ulRemHi; + ULONG rgulProd[6]; + + iScale = pdecL->u.u.scale + pdecR->u.u.scale; + + if ((pdecL->Hi32 | pdecL->v.v.Mid32 | pdecR->Hi32 | pdecR->v.v.Mid32) == 0) + { + // Upper 64 bits are zero. + // + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Lo32, pdecR->v.v.Lo32); + if (iScale > DEC_SCALE_MAX) + { + // Result iScale is too big. Divide result by power of 10 to reduce it. + // If the amount to divide by is > 19 the result is guaranteed + // less than 1/2. [max value in 64 bits = 1.84E19] + // + iScale -= DEC_SCALE_MAX; + if (iScale > 19) + { +ReturnZero: + DECIMAL_SETZERO(*pdecRes); + return NOERROR; + } + if (iScale > POWER10_MAX) + { + // Divide by 1E10 first, to get the power down to a 32-bit quantity. + // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now + // then multiply the next divisor by 4 (which will be a max of 4E9). + // + ulRemLo = FullDiv64By32(&sdlTmp.int64, ulTenToTenDiv4); + ulPwr = rgulPower10[iScale - 10] << 2; + } + else + { + ulPwr = rgulPower10[iScale]; + ulRemLo = 0; + } + + // Power to divide by fits in 32 bits. + // + ulRemHi = FullDiv64By32(&sdlTmp.int64, ulPwr); + + // Round result. See if remainder >= 1/2 of divisor. + // Divisor is a power of 10, so it is always even. + // + ulPwr >>= 1; + if (ulRemHi >= ulPwr && (ulRemHi > ulPwr || (ulRemLo | (sdlTmp.u.Lo & 1)))) + sdlTmp.int64++; + + iScale = DEC_SCALE_MAX; + } + DECIMAL_LO32(*pdecRes) = sdlTmp.u.Lo; + DECIMAL_MID32(*pdecRes) = sdlTmp.u.Hi; + DECIMAL_HI32(*pdecRes) = 0; + } + else + { + + // At least one operand has bits set in the upper 64 bits. + // + // Compute and accumulate the 9 partial products into a + // 192-bit (24-byte) result. + // + // [l-h][l-m][l-l] left high, middle, low + // x [r-h][r-m][r-l] right high, middle, low + // ------------------------------ + // + // [0-h][0-l] l-l * r-l + // [1ah][1al] l-l * r-m + // [1bh][1bl] l-m * r-l + // [2ah][2al] l-m * r-m + // [2bh][2bl] l-l * r-h + // [2ch][2cl] l-h * r-l + // [3ah][3al] l-m * r-h + // [3bh][3bl] l-h * r-m + // [4-h][4-l] l-h * r-h + // ------------------------------ + // [p-5][p-4][p-3][p-2][p-1][p-0] prod[] array + // + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Lo32, pdecR->v.v.Lo32); + rgulProd[0] = sdlTmp.u.Lo; + + sdlTmp2.int64 = UInt32x32To64(pdecL->v.v.Lo32, pdecR->v.v.Mid32) + sdlTmp.u.Hi; + + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Mid32, pdecR->v.v.Lo32); + sdlTmp.int64 += sdlTmp2.int64; // this could generate carry + rgulProd[1] = sdlTmp.u.Lo; + if (sdlTmp.int64 < sdlTmp2.int64) // detect carry + sdlTmp2.u.Hi = 1; + else + sdlTmp2.u.Hi = 0; + sdlTmp2.u.Lo = sdlTmp.u.Hi; + + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Mid32, pdecR->v.v.Mid32) + sdlTmp2.int64; + + if (pdecL->Hi32 | pdecR->Hi32) { + // Highest 32 bits is non-zero. Calculate 5 more partial products. + // + sdlTmp2.int64 = UInt32x32To64(pdecL->v.v.Lo32, pdecR->Hi32); + sdlTmp.int64 += sdlTmp2.int64; // this could generate carry + if (sdlTmp.int64 < sdlTmp2.int64) // detect carry + sdlTmp3.u.Hi = 1; + else + sdlTmp3.u.Hi = 0; + + sdlTmp2.int64 = UInt32x32To64(pdecL->Hi32, pdecR->v.v.Lo32); + sdlTmp.int64 += sdlTmp2.int64; // this could generate carry + rgulProd[2] = sdlTmp.u.Lo; + if (sdlTmp.int64 < sdlTmp2.int64) // detect carry + sdlTmp3.u.Hi++; + sdlTmp3.u.Lo = sdlTmp.u.Hi; + + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Mid32, pdecR->Hi32); + sdlTmp.int64 += sdlTmp3.int64; // this could generate carry + if (sdlTmp.int64 < sdlTmp3.int64) // detect carry + sdlTmp3.u.Hi = 1; + else + sdlTmp3.u.Hi = 0; + + sdlTmp2.int64 = UInt32x32To64(pdecL->Hi32, pdecR->v.v.Mid32); + sdlTmp.int64 += sdlTmp2.int64; // this could generate carry + rgulProd[3] = sdlTmp.u.Lo; + if (sdlTmp.int64 < sdlTmp2.int64) // detect carry + sdlTmp3.u.Hi++; + sdlTmp3.u.Lo = sdlTmp.u.Hi; + + sdlTmp.int64 = UInt32x32To64(pdecL->Hi32, pdecR->Hi32) + sdlTmp3.int64; + rgulProd[4] = sdlTmp.u.Lo; + rgulProd[5] = sdlTmp.u.Hi; + + iHiProd = 5; + } + else { + rgulProd[2] = sdlTmp.u.Lo; + rgulProd[3] = sdlTmp.u.Hi; + iHiProd = 3; + } + + // Check for leading zero ULONGs on the product + // + while (rgulProd[iHiProd] == 0) { + iHiProd--; + if (iHiProd < 0) + goto ReturnZero; + } + + iScale = ScaleResult(rgulProd, iHiProd, iScale); + if (iScale == -1) + return DISP_E_OVERFLOW; + + pdecRes->v.v.Lo32 = rgulProd[0]; + pdecRes->v.v.Mid32 = rgulProd[1]; + pdecRes->Hi32 = rgulProd[2]; + } + + pdecRes->u.u.sign = pdecR->u.u.sign ^ pdecL->u.u.sign; + pdecRes->u.u.scale = (char)iScale; + return NOERROR; +} + + +//********************************************************************** +// +// VarDecAdd - Decimal Addition +// VarDecSub - Decimal Subtraction +// +//********************************************************************** + +static HRESULT DecAddSub(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes, char bSign); + +STDAPI VarDecAdd(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes) +{ + return DecAddSub(pdecL, pdecR, pdecRes, 0); +} + + +STDAPI VarDecSub(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes) +{ + return DecAddSub(pdecL, pdecR, pdecRes, DECIMAL_NEG); +} + + +static HRESULT DecAddSub(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes, char bSign) +{ + ULONG rgulNum[6]; + ULONG ulPwr; + int iScale; + int iHiProd; + int iCur; + SPLIT64 sdlTmp; + DECIMAL decRes; + DECIMAL decTmp; + LPDECIMAL pdecTmp; + + bSign ^= (pdecR->u.u.sign ^ pdecL->u.u.sign) & DECIMAL_NEG; + + if (pdecR->u.u.scale == pdecL->u.u.scale) { + // Scale factors are equal, no alignment necessary. + // + decRes.u.signscale = pdecL->u.signscale; + +AlignedAdd: + if (bSign) { + // Signs differ - subtract + // + DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*pdecL) - DECIMAL_LO64_GET(*pdecR)); + DECIMAL_HI32(decRes) = DECIMAL_HI32(*pdecL) - DECIMAL_HI32(*pdecR); + + // Propagate carry + // + if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*pdecL)) { + decRes.Hi32--; + if (decRes.Hi32 >= pdecL->Hi32) + goto SignFlip; + } + else if (decRes.Hi32 > pdecL->Hi32) { + // Got negative result. Flip its sign. + // +SignFlip: + DECIMAL_LO64_SET(decRes, -(LONGLONG)DECIMAL_LO64_GET(decRes)); + decRes.Hi32 = ~decRes.Hi32; + if (DECIMAL_LO64_GET(decRes) == 0) + decRes.Hi32++; + decRes.u.u.sign ^= DECIMAL_NEG; + } + + } + else { + // Signs are the same - add + // + DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*pdecL) + DECIMAL_LO64_GET(*pdecR)); + decRes.Hi32 = pdecL->Hi32 + pdecR->Hi32; + + // Propagate carry + // + if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*pdecL)) { + decRes.Hi32++; + if (decRes.Hi32 <= pdecL->Hi32) + goto AlignedScale; + } + else if (decRes.Hi32 < pdecL->Hi32) { +AlignedScale: + // The addition carried above 96 bits. Divide the result by 10, + // dropping the scale factor. + // + if (decRes.u.u.scale == 0) + return DISP_E_OVERFLOW; + decRes.u.u.scale--; + + sdlTmp.u.Lo = decRes.Hi32; + sdlTmp.u.Hi = 1; + sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10); + decRes.Hi32 = sdlTmp.u.Lo; + + sdlTmp.u.Lo = decRes.v.v.Mid32; + sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10); + decRes.v.v.Mid32 = sdlTmp.u.Lo; + + sdlTmp.u.Lo = decRes.v.v.Lo32; + sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10); + decRes.v.v.Lo32 = sdlTmp.u.Lo; + + // See if we need to round up. + // + if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) { + DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1) + if (DECIMAL_LO64_GET(decRes) == 0) + decRes.Hi32++; + } + } + } + } + else { + // Scale factors are not equal. Assume that a larger scale + // factor (more decimal places) is likely to mean that number + // is smaller. Start by guessing that the right operand has + // the larger scale factor. The result will have the larger + // scale factor. + // + decRes.u.u.scale = pdecR->u.u.scale; // scale factor of "smaller" + decRes.u.u.sign = pdecL->u.u.sign; // but sign of "larger" + iScale = decRes.u.u.scale - pdecL->u.u.scale; + + if (iScale < 0) { + // Guessed scale factor wrong. Swap operands. + // + iScale = -iScale; + decRes.u.u.scale = pdecL->u.u.scale; + decRes.u.u.sign ^= bSign; + pdecTmp = pdecR; + pdecR = pdecL; + pdecL = pdecTmp; + } + + // *pdecL will need to be multiplied by 10^iScale so + // it will have the same scale as *pdecR. We could be + // extending it to up to 192 bits of precision. + // + if (iScale <= POWER10_MAX) { + // Scaling won't make it larger than 4 ULONGs + // + ulPwr = rgulPower10[iScale]; + DECIMAL_LO64_SET(decTmp, UInt32x32To64(pdecL->v.v.Lo32, ulPwr)); + sdlTmp.int64 = UInt32x32To64(pdecL->v.v.Mid32, ulPwr); + sdlTmp.int64 += decTmp.v.v.Mid32; + decTmp.v.v.Mid32 = sdlTmp.u.Lo; + decTmp.Hi32 = sdlTmp.u.Hi; + sdlTmp.int64 = UInt32x32To64(pdecL->Hi32, ulPwr); + sdlTmp.int64 += decTmp.Hi32; + if (sdlTmp.u.Hi == 0) { + // Result fits in 96 bits. Use standard aligned add. + // + decTmp.Hi32 = sdlTmp.u.Lo; + pdecL = &decTmp; + goto AlignedAdd; + } + rgulNum[0] = decTmp.v.v.Lo32; + rgulNum[1] = decTmp.v.v.Mid32; + rgulNum[2] = sdlTmp.u.Lo; + rgulNum[3] = sdlTmp.u.Hi; + iHiProd = 3; + } + else { + // Have to scale by a bunch. Move the number to a buffer + // where it has room to grow as it's scaled. + // + rgulNum[0] = pdecL->v.v.Lo32; + rgulNum[1] = pdecL->v.v.Mid32; + rgulNum[2] = pdecL->Hi32; + iHiProd = 2; + + // Scan for zeros in the upper words. + // + if (rgulNum[2] == 0) { + iHiProd = 1; + if (rgulNum[1] == 0) { + iHiProd = 0; + if (rgulNum[0] == 0) { + // Left arg is zero, return right. + // + DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*pdecR)); + decRes.Hi32 = pdecR->Hi32; + decRes.u.u.sign ^= bSign; + goto RetDec; + } + } + } + + // Scaling loop, up to 10^9 at a time. iHiProd stays updated + // with index of highest non-zero ULONG. + // + for (; iScale > 0; iScale -= POWER10_MAX) { + if (iScale > POWER10_MAX) + ulPwr = ulTenToNine; + else + ulPwr = rgulPower10[iScale]; + + sdlTmp.u.Hi = 0; + for (iCur = 0; iCur <= iHiProd; iCur++) { + sdlTmp.int64 = UInt32x32To64(rgulNum[iCur], ulPwr) + sdlTmp.u.Hi; + rgulNum[iCur] = sdlTmp.u.Lo; + } + + if (sdlTmp.u.Hi != 0) + // We're extending the result by another ULONG. + rgulNum[++iHiProd] = sdlTmp.u.Hi; + } + } + + // Scaling complete, do the add. Could be subtract if signs differ. + // + sdlTmp.u.Lo = rgulNum[0]; + sdlTmp.u.Hi = rgulNum[1]; + + if (bSign) { + // Signs differ, subtract. + // + DECIMAL_LO64_SET(decRes, sdlTmp.int64 - DECIMAL_LO64_GET(*pdecR)); + decRes.Hi32 = rgulNum[2] - pdecR->Hi32; + + // Propagate carry + // + if (DECIMAL_LO64_GET(decRes) > sdlTmp.int64) { + decRes.Hi32--; + if (decRes.Hi32 >= rgulNum[2]) + goto LongSub; + } + else if (decRes.Hi32 > rgulNum[2]) { +LongSub: + // If rgulNum has more than 96 bits of precision, then we need to + // carry the subtraction into the higher bits. If it doesn't, + // then we subtracted in the wrong order and have to flip the + // sign of the result. + // + if (iHiProd <= 2) + goto SignFlip; + + iCur = 3; + while(rgulNum[iCur++]-- == 0); + if (rgulNum[iHiProd] == 0) + iHiProd--; + } + } + else { + // Signs the same, add. + // + DECIMAL_LO64_SET(decRes, sdlTmp.int64 + DECIMAL_LO64_GET(*pdecR)); + decRes.Hi32 = rgulNum[2] + pdecR->Hi32; + + // Propagate carry + // + if (DECIMAL_LO64_GET(decRes) < sdlTmp.int64) { + decRes.Hi32++; + if (decRes.Hi32 <= rgulNum[2]) + goto LongAdd; + } + else if (decRes.Hi32 < rgulNum[2]) { +LongAdd: + // Had a carry above 96 bits. + // + iCur = 3; + do { + if (iHiProd < iCur) { + rgulNum[iCur] = 1; + iHiProd = iCur; + break; + } + }while (++rgulNum[iCur++] == 0); + } + } + + if (iHiProd > 2) { + rgulNum[0] = decRes.v.v.Lo32; + rgulNum[1] = decRes.v.v.Mid32; + rgulNum[2] = decRes.Hi32; + decRes.u.u.scale = ScaleResult(rgulNum, iHiProd, decRes.u.u.scale); + if (decRes.u.u.scale == (BYTE) -1) + return DISP_E_OVERFLOW; + + decRes.v.v.Lo32 = rgulNum[0]; + decRes.v.v.Mid32 = rgulNum[1]; + decRes.Hi32 = rgulNum[2]; + } + } + +RetDec: + COPYDEC(*pdecRes, decRes) + return NOERROR; +} + + +//********************************************************************** +// +// VarDecDiv - Decimal Divide +// +//********************************************************************** + +STDAPI VarDecDiv(LPDECIMAL pdecL, LPDECIMAL pdecR, LPDECIMAL pdecRes) +{ + ULONG rgulQuo[3]; + ULONG rgulQuoSave[3]; + ULONG rgulRem[4]; + ULONG rgulDivisor[3]; + ULONG ulPwr; + ULONG ulTmp; + ULONG ulTmp1; + SPLIT64 sdlTmp; + SPLIT64 sdlDivisor; + int iScale; + int iCurScale; + + iScale = pdecL->u.u.scale - pdecR->u.u.scale; + rgulDivisor[0] = pdecR->v.v.Lo32; + rgulDivisor[1] = pdecR->v.v.Mid32; + rgulDivisor[2] = pdecR->Hi32; + + if (rgulDivisor[1] == 0 && rgulDivisor[2] == 0) { + // Divisor is only 32 bits. Easy divide. + // + if (rgulDivisor[0] == 0) + return DISP_E_DIVBYZERO; + + rgulQuo[0] = pdecL->v.v.Lo32; + rgulQuo[1] = pdecL->v.v.Mid32; + rgulQuo[2] = pdecL->Hi32; + rgulRem[0] = Div96By32(rgulQuo, rgulDivisor[0]); + + for (;;) { + if (rgulRem[0] == 0) { + if (iScale < 0) { + iCurScale = min(9, -iScale); + goto HaveScale; + } + break; + } + + // We have computed a quotient based on the natural scale + // ( <dividend scale> - <divisor scale> ). We have a non-zero + // remainder, so now we should increase the scale if possible to + // include more quotient bits. + // + // If it doesn't cause overflow, we'll loop scaling by 10^9 and + // computing more quotient bits as long as the remainder stays + // non-zero. If scaling by that much would cause overflow, we'll + // drop out of the loop and scale by as much as we can. + // + // Scaling by 10^9 will overflow if rgulQuo[2].rgulQuo[1] >= 2^32 / 10^9 + // = 4.294 967 296. So the upper limit is rgulQuo[2] == 4 and + // rgulQuo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+. Since + // quotient bits in rgulQuo[0] could be all 1's, then 1,266,874,888 + // is the largest value in rgulQuo[1] (when rgulQuo[2] == 4) that is + // assured not to overflow. + // + iCurScale = SearchScale(rgulQuo[2], rgulQuo[1], iScale); + if (iCurScale == 0) { + // No more scaling to be done, but remainder is non-zero. + // Round quotient. + // + ulTmp = rgulRem[0] << 1; + if (ulTmp < rgulRem[0] || (ulTmp >= rgulDivisor[0] && + (ulTmp > rgulDivisor[0] || (rgulQuo[0] & 1)))) { +RoundUp: + if (++rgulQuo[0] == 0) + if (++rgulQuo[1] == 0) + rgulQuo[2]++; + } + break; + } + + if (iCurScale == -1) + return DISP_E_OVERFLOW; + +HaveScale: + ulPwr = rgulPower10[iCurScale]; + iScale += iCurScale; + + if (IncreaseScale(rgulQuo, ulPwr) != 0) + return DISP_E_OVERFLOW; + + sdlTmp.int64 = DivMod64by32(UInt32x32To64(rgulRem[0], ulPwr), rgulDivisor[0]); + rgulRem[0] = sdlTmp.u.Hi; + + rgulQuo[0] += sdlTmp.u.Lo; + if (rgulQuo[0] < sdlTmp.u.Lo) { + if (++rgulQuo[1] == 0) + rgulQuo[2]++; + } + } // for (;;) + } + else { + // Divisor has bits set in the upper 64 bits. + // + // Divisor must be fully normalized (shifted so bit 31 of the most + // significant ULONG is 1). Locate the MSB so we know how much to + // normalize by. The dividend will be shifted by the same amount so + // the quotient is not changed. + // + if (rgulDivisor[2] == 0) + ulTmp = rgulDivisor[1]; + else + ulTmp = rgulDivisor[2]; + + iCurScale = 0; + if (!(ulTmp & 0xFFFF0000)) { + iCurScale += 16; + ulTmp <<= 16; + } + if (!(ulTmp & 0xFF000000)) { + iCurScale += 8; + ulTmp <<= 8; + } + if (!(ulTmp & 0xF0000000)) { + iCurScale += 4; + ulTmp <<= 4; + } + if (!(ulTmp & 0xC0000000)) { + iCurScale += 2; + ulTmp <<= 2; + } + if (!(ulTmp & 0x80000000)) { + iCurScale++; + ulTmp <<= 1; + } + + // Shift both dividend and divisor left by iCurScale. + // + sdlTmp.int64 = DECIMAL_LO64_GET(*pdecL) << iCurScale; + rgulRem[0] = sdlTmp.u.Lo; + rgulRem[1] = sdlTmp.u.Hi; + sdlTmp.u.Lo = pdecL->v.v.Mid32; + sdlTmp.u.Hi = pdecL->Hi32; + sdlTmp.int64 <<= iCurScale; + rgulRem[2] = sdlTmp.u.Hi; + rgulRem[3] = (pdecL->Hi32 >> (31 - iCurScale)) >> 1; + + sdlDivisor.u.Lo = rgulDivisor[0]; + sdlDivisor.u.Hi = rgulDivisor[1]; + sdlDivisor.int64 <<= iCurScale; + + if (rgulDivisor[2] == 0) { + // Have a 64-bit divisor in sdlDivisor. The remainder + // (currently 96 bits spread over 4 ULONGs) will be < divisor. + // + sdlTmp.u.Lo = rgulRem[2]; + sdlTmp.u.Hi = rgulRem[3]; + + rgulQuo[2] = 0; + rgulQuo[1] = Div96By64(&rgulRem[1], sdlDivisor); + rgulQuo[0] = Div96By64(rgulRem, sdlDivisor); + + for (;;) { + if ((rgulRem[0] | rgulRem[1]) == 0) { + if (iScale < 0) { + iCurScale = min(9, -iScale); + goto HaveScale64; + } + break; + } + + // Remainder is non-zero. Scale up quotient and remainder by + // powers of 10 so we can compute more significant bits. + // + iCurScale = SearchScale(rgulQuo[2], rgulQuo[1], iScale); + if (iCurScale == 0) { + // No more scaling to be done, but remainder is non-zero. + // Round quotient. + // + sdlTmp.u.Lo = rgulRem[0]; + sdlTmp.u.Hi = rgulRem[1]; + if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 || + (sdlTmp.int64 == sdlDivisor.int64 && (rgulQuo[0] & 1))) + goto RoundUp; + break; + } + + if (iCurScale == -1) + return DISP_E_OVERFLOW; + +HaveScale64: + ulPwr = rgulPower10[iCurScale]; + iScale += iCurScale; + + if (IncreaseScale(rgulQuo, ulPwr) != 0) + return DISP_E_OVERFLOW; + + rgulRem[2] = 0; // rem is 64 bits, IncreaseScale uses 96 + IncreaseScale(rgulRem, ulPwr); + ulTmp = Div96By64(rgulRem, sdlDivisor); + rgulQuo[0] += ulTmp; + if (rgulQuo[0] < ulTmp) + if (++rgulQuo[1] == 0) + rgulQuo[2]++; + + } // for (;;) + } + else { + // Have a 96-bit divisor in rgulDivisor[]. + // + // Start by finishing the shift left by iCurScale. + // + sdlTmp.u.Lo = rgulDivisor[1]; + sdlTmp.u.Hi = rgulDivisor[2]; + sdlTmp.int64 <<= iCurScale; + rgulDivisor[0] = sdlDivisor.u.Lo; + rgulDivisor[1] = sdlDivisor.u.Hi; + rgulDivisor[2] = sdlTmp.u.Hi; + + // The remainder (currently 96 bits spread over 4 ULONGs) + // will be < divisor. + // + rgulQuo[2] = 0; + rgulQuo[1] = 0; + rgulQuo[0] = Div128By96(rgulRem, rgulDivisor); + + for (;;) { + if ((rgulRem[0] | rgulRem[1] | rgulRem[2]) == 0) { + if (iScale < 0) { + iCurScale = min(9, -iScale); + goto HaveScale96; + } + break; + } + + // Remainder is non-zero. Scale up quotient and remainder by + // powers of 10 so we can compute more significant bits. + // + iCurScale = SearchScale(rgulQuo[2], rgulQuo[1], iScale); + if (iCurScale == 0) { + // No more scaling to be done, but remainder is non-zero. + // Round quotient. + // + if (rgulRem[2] >= 0x80000000) + goto RoundUp; + + ulTmp = rgulRem[0] > 0x80000000; + ulTmp1 = rgulRem[1] > 0x80000000; + rgulRem[0] <<= 1; + rgulRem[1] = (rgulRem[1] << 1) + ulTmp; + rgulRem[2] = (rgulRem[2] << 1) + ulTmp1; + + if (rgulRem[2] > rgulDivisor[2] || rgulRem[2] == rgulDivisor[2] && + (rgulRem[1] > rgulDivisor[1] || rgulRem[1] == rgulDivisor[1] && + (rgulRem[0] > rgulDivisor[0] || rgulRem[0] == rgulDivisor[0] && + (rgulQuo[0] & 1)))) + goto RoundUp; + break; + } + + if (iCurScale == -1) + return DISP_E_OVERFLOW; + +HaveScale96: + ulPwr = rgulPower10[iCurScale]; + iScale += iCurScale; + + if (IncreaseScale(rgulQuo, ulPwr) != 0) + return DISP_E_OVERFLOW; + + rgulRem[3] = IncreaseScale(rgulRem, ulPwr); + ulTmp = Div128By96(rgulRem, rgulDivisor); + rgulQuo[0] += ulTmp; + if (rgulQuo[0] < ulTmp) + if (++rgulQuo[1] == 0) + rgulQuo[2]++; + + } // for (;;) + } + } + + // No more remainder. Try extracting any extra powers of 10 we may have + // added. We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1. + // If a division by one of these powers returns a zero remainder, then + // we keep the quotient. If the remainder is not zero, then we restore + // the previous value. + // + // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 + // we can extract. We use this as a quick test on whether to try a + // given power. + // + while ((rgulQuo[0] & 0xFF) == 0 && iScale >= 8) { + rgulQuoSave[0] = rgulQuo[0]; + rgulQuoSave[1] = rgulQuo[1]; + rgulQuoSave[2] = rgulQuo[2]; + + if (Div96By32(rgulQuoSave, 100000000) == 0) { + rgulQuo[0] = rgulQuoSave[0]; + rgulQuo[1] = rgulQuoSave[1]; + rgulQuo[2] = rgulQuoSave[2]; + iScale -= 8; + } + else + break; + } + + if ((rgulQuo[0] & 0xF) == 0 && iScale >= 4) { + rgulQuoSave[0] = rgulQuo[0]; + rgulQuoSave[1] = rgulQuo[1]; + rgulQuoSave[2] = rgulQuo[2]; + + if (Div96By32(rgulQuoSave, 10000) == 0) { + rgulQuo[0] = rgulQuoSave[0]; + rgulQuo[1] = rgulQuoSave[1]; + rgulQuo[2] = rgulQuoSave[2]; + iScale -= 4; + } + } + + if ((rgulQuo[0] & 3) == 0 && iScale >= 2) { + rgulQuoSave[0] = rgulQuo[0]; + rgulQuoSave[1] = rgulQuo[1]; + rgulQuoSave[2] = rgulQuo[2]; + + if (Div96By32(rgulQuoSave, 100) == 0) { + rgulQuo[0] = rgulQuoSave[0]; + rgulQuo[1] = rgulQuoSave[1]; + rgulQuo[2] = rgulQuoSave[2]; + iScale -= 2; + } + } + + if ((rgulQuo[0] & 1) == 0 && iScale >= 1) { + rgulQuoSave[0] = rgulQuo[0]; + rgulQuoSave[1] = rgulQuo[1]; + rgulQuoSave[2] = rgulQuo[2]; + + if (Div96By32(rgulQuoSave, 10) == 0) { + rgulQuo[0] = rgulQuoSave[0]; + rgulQuo[1] = rgulQuoSave[1]; + rgulQuo[2] = rgulQuoSave[2]; + iScale -= 1; + } + } + + pdecRes->Hi32 = rgulQuo[2]; + pdecRes->v.v.Mid32 = rgulQuo[1]; + pdecRes->v.v.Lo32 = rgulQuo[0]; + pdecRes->u.u.scale = iScale; + pdecRes->u.u.sign = pdecL->u.u.sign ^ pdecR->u.u.sign; + return NOERROR; +} + + +//********************************************************************** +// +// VarDecAbs - Decimal Absolute Value +// +//********************************************************************** + +STDAPI VarDecAbs(LPDECIMAL pdecOprd, LPDECIMAL pdecRes) +{ + COPYDEC(*pdecRes, *pdecOprd) + pdecRes->u.u.sign &= ~DECIMAL_NEG; + return NOERROR; +} + + +//********************************************************************** +// +// VarDecFix - Decimal Fix (chop to integer) +// +//********************************************************************** + +STDAPI VarDecFix(LPDECIMAL pdecOprd, LPDECIMAL pdecRes) +{ + DecFixInt(pdecRes, pdecOprd); + return NOERROR; +} + + +//********************************************************************** +// +// VarDecInt - Decimal Int (round down to integer) +// +//********************************************************************** + +STDAPI VarDecInt(LPDECIMAL pdecOprd, LPDECIMAL pdecRes) +{ + if (DecFixInt(pdecRes, pdecOprd) != 0 && (pdecRes->u.u.sign & DECIMAL_NEG)) { + // We have chopped off a non-zero amount from a negative value. Since + // we round toward -infinity, we must increase the integer result by + // 1 to make it more negative. This will never overflow because + // in order to have a remainder, we must have had a non-zero scale factor. + // Our scale factor is back to zero now. + // + DECIMAL_LO64_SET(*pdecRes, DECIMAL_LO64_GET(*pdecRes) + 1); + if (DECIMAL_LO64_GET(*pdecRes) == 0) + pdecRes->Hi32++; + } + return NOERROR; +} + + +//********************************************************************** +// +// VarDecNeg - Decimal Negate +// +//********************************************************************** + +STDAPI VarDecNeg(LPDECIMAL pdecOprd, LPDECIMAL pdecRes) +{ + COPYDEC(*pdecRes, *pdecOprd) + pdecRes->u.u.sign ^= DECIMAL_NEG; + return NOERROR; +} + + +//********************************************************************** +// +// VarDecCmp - Decimal Compare +// +//********************************************************************** + +STDAPI VarDecCmp(LPDECIMAL pdecL, LPDECIMAL pdecR) +{ + ULONG ulSgnL; + ULONG ulSgnR; + + // First check signs and whether either are zero. If both are + // non-zero and of the same sign, just use subtraction to compare. + // + ulSgnL = pdecL->v.v.Lo32 | pdecL->v.v.Mid32 | pdecL->Hi32; + ulSgnR = pdecR->v.v.Lo32 | pdecR->v.v.Mid32 | pdecR->Hi32; + if (ulSgnL != 0) + ulSgnL = (pdecL->u.u.sign & DECIMAL_NEG) | 1; + + if (ulSgnR != 0) + ulSgnR = (pdecR->u.u.sign & DECIMAL_NEG) | 1; + + // ulSgnL & ulSgnR have values 1, 0, or 0x81 depending on if the left/right + // operand is +, 0, or -. + // + if (ulSgnL == ulSgnR) { + if (ulSgnL == 0) // both are zero + return VARCMP_EQ; // return equal + + DECIMAL decRes; + + DecAddSub(pdecL, pdecR, &decRes, DECIMAL_NEG); + if (DECIMAL_LO64_GET(decRes) == 0 && decRes.Hi32 == 0) + return VARCMP_EQ; + if (decRes.u.u.sign & DECIMAL_NEG) + return VARCMP_LT; + return VARCMP_GT; + } + + // Signs are different. Used signed byte compares + // + if ((char)ulSgnL > (char)ulSgnR) + return VARCMP_GT; + return VARCMP_LT; +} + +STDAPI VarDecRound(LPDECIMAL pdecIn, int cDecimals, LPDECIMAL pdecRes) +{ + ULONG rgulNum[3]; + ULONG ulRem; + ULONG ulSticky; + ULONG ulPwr; + int iScale; + + if (cDecimals < 0) + return E_INVALIDARG; + + iScale = pdecIn->u.u.scale - cDecimals; + if (iScale > 0) + { + rgulNum[0] = pdecIn->v.v.Lo32; + rgulNum[1] = pdecIn->v.v.Mid32; + rgulNum[2] = pdecIn->Hi32; + pdecRes->u.u.sign = pdecIn->u.u.sign; + ulRem = ulSticky = 0; + + do { + ulSticky |= ulRem; + if (iScale > POWER10_MAX) + ulPwr = ulTenToNine; + else + ulPwr = rgulPower10[iScale]; + + ulRem = Div96By32(rgulNum, ulPwr); + iScale -= 9; + }while (iScale > 0); + + // Now round. ulRem has last remainder, ulSticky has sticky bits. + // To do IEEE rounding, we add LSB of result to sticky bits so + // either causes round up if remainder * 2 == last divisor. + // + ulSticky |= rgulNum[0] & 1; + ulRem = (ulRem << 1) + (ulSticky != 0); + if (ulPwr < ulRem && + ++rgulNum[0] == 0 && + ++rgulNum[1] == 0 + ) + ++rgulNum[2]; + + pdecRes->v.v.Lo32 = rgulNum[0]; + pdecRes->v.v.Mid32 = rgulNum[1]; + pdecRes->Hi32 = rgulNum[2]; + pdecRes->u.u.scale = cDecimals; + return NOERROR; + } + + COPYDEC(*pdecRes, *pdecIn) + return NOERROR; +} |