summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorChris McKinsey <chrismck@microsoft.com>2016-03-09 15:03:48 -0800
committerChris McKinsey <chrismck@microsoft.com>2016-03-10 19:30:00 -0800
commit2a487c287a732572bb8043fd9f3e17f32c855c35 (patch)
treeb13901c09801967d7ed4e4fe26f8a261c214ae2e /src/jit
parentefc4eba1b0f05fe7da21c83221e856a53cee6f34 (diff)
downloadcoreclr-2a487c287a732572bb8043fd9f3e17f32c855c35.tar.gz
coreclr-2a487c287a732572bb8043fd9f3e17f32c855c35.tar.bz2
coreclr-2a487c287a732572bb8043fd9f3e17f32c855c35.zip
Fix bad codegen for initblk with large fill value
When the initialization value of an init block is constant we attempt to generate an inline expansion sequences to execute the stores. In order to generate wider stores the constant value is expanded so that each byte position holds the initial byte value. In some cases the constant on the initblk may be a value not in the range of 0..255 and this constant expansion will generate the wrong value. This change fixes the x64, x86, and arm64 (comment only) expanses to use only the lower byte value of this constant. The ARM32 backend already does this, and the other JIT32/JIT64 JITs shipping in Desktop also do this correctly.
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/lowerarm64.cpp15
-rw-r--r--src/jit/lowerxarch.cpp17
2 files changed, 21 insertions, 11 deletions
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index 465800acd3..c51f7b052e 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -1038,16 +1038,21 @@ Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode)
&& initVal->IsCnsIntOrI())
{
ssize_t size = blockSize->gtIntCon.gtIconVal;
- // Replace the integer constant in initVal
- // to fill an 8-byte word with the fill value of the InitBlk
- assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF));
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
if (size < REGSIZE_BYTES)
{
- initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal;
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
}
else
{
- initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal;
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
initVal->gtType = TYP_LONG;
}
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 5a2e42924d..c9ea7d18b1 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -1769,21 +1769,26 @@ Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode)
// Always favor unrolling vs rep stos.
if (size <= INITBLK_UNROLL_LIMIT && initVal->IsCnsIntOrI())
{
- // Replace the integer constant in initVal
- // to fill an 8-byte word with the fill value of the InitBlk
- assert(initVal->gtIntCon.gtIconVal == (initVal->gtIntCon.gtIconVal & 0xFF));
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
#ifdef _TARGET_AMD64_
if (size < REGSIZE_BYTES)
{
- initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal;
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
}
else
{
- initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * initVal->gtIntCon.gtIconVal;
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
initVal->gtType = TYP_LONG;
}
#else // !_TARGET_AMD64_
- initVal->gtIntCon.gtIconVal = 0x01010101 * initVal->gtIntCon.gtIconVal;
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
#endif // !_TARGET_AMD64_
MakeSrcContained(blkNode, blockSize);