diff options
author | Kyungwoo Lee <kyulee@microsoft.com> | 2016-03-25 11:19:25 -0700 |
---|---|---|
committer | Kyungwoo Lee <kyulee@microsoft.com> | 2016-03-29 13:42:16 -0700 |
commit | 68c70fa6f7207ee4acfd7f6cf511e91d46bdcf53 (patch) | |
tree | 788dbda0f4fdb6801b0b076550a30bc3fa5e2309 | |
parent | 587cbb77129c31e93c5b5ccbe03bb6c41dc5821c (diff) | |
download | coreclr-68c70fa6f7207ee4acfd7f6cf511e91d46bdcf53.tar.gz coreclr-68c70fa6f7207ee4acfd7f6cf511e91d46bdcf53.tar.bz2 coreclr-68c70fa6f7207ee4acfd7f6cf511e91d46bdcf53.zip |
ARM64: Fix Frame with compLocallocUsed
When compLocallocUsed is true, stack pointer is changed during the run.
So, JIT should restore stack pointer from frame pointer in the epilog.
Previously, we simpliy copied fp to sp, which is only valid for frameType
= 1.
This implements a right restoring process for other frameTypes (2 and 3).
For frameType = 3, I also simplified creating prolog sequence to support
this feature consistently.
-rw-r--r-- | src/jit/codegen.h | 3 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 101 | ||||
-rw-r--r-- | tests/arm64/Tests.lst | 30 |
3 files changed, 60 insertions, 74 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h index a0423e8972..f1ad82db38 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -499,8 +499,7 @@ protected: #if defined(_TARGET_ARM64_) - void genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog, - /* IN OUT */ bool* pUnwindStarted); + void genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog); #else // !defined(_TARGET_ARM64_) diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 6eb5e0a522..4f8f8a9584 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -5679,25 +5679,11 @@ void CodeGen::genPushCalleeSavedRegisters() assert(spAdjustment3 > 0); assert((spAdjustment3 % 16) == 0); - // Try to push the frame pointer setup down, so the unwind codes match better (there is no corresponding instruction in the epilog). - bool isFPEstablished = false; - int maxFPOffset = spAdjustment3 + alignmentAdjustment2; - if (!emitter::emitIns_valid_imm_for_add(maxFPOffset, EA_PTRSIZE)) - { - getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2); - compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); - isFPEstablished = true; - } + getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2); + compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed); offset += spAdjustment3; - - if (!isFPEstablished) - { - getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, maxFPOffset); - compiler->unwindSetFrameReg(REG_FPBASE, maxFPOffset); - isFPEstablished = true; - } } else { @@ -6308,18 +6294,10 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) #elif defined(_TARGET_ARM64_) -void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog, - /* IN OUT */ bool* pUnwindStarted) +void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { assert(compiler->compGeneratingEpilog); - // We're going to generate an unwindable instruction. If not, we need to optimize this. - if (!*pUnwindStarted) - { - compiler->unwindBegEpilog(); - *pUnwindStarted = true; - } - regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; if (isFramePointerUsed()) @@ -6343,13 +6321,28 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) { frameType = 1; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + // mov sp, fp + inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); + compiler->unwindSetFrameReg(REG_FPBASE, 0); + } regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. calleeSaveSPOffset = totalFrameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES; + } else if (totalFrameSize <= 512) { frameType = 2; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + // sub sp, fp, #outsz + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); + compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); + } regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. calleeSaveSPOffset = totalFrameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES; @@ -6381,12 +6374,22 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned; assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES)); - // Generate: - // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more careful - int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2; - assert(spAdjustment3 > 0); - assert((spAdjustment3 % 16) == 0); - genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr); + if (compiler->compLocallocUsed) + { + // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in prolog. + // sub sp, fp, #alignmentAdjustment2 + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2); + compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); + } + else + { + // Generate: + // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more careful + int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2; + assert(spAdjustment3 > 0); + assert((spAdjustment3 % 16) == 0); + genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr); + } // Generate: // ldp fp,lr,[sp] @@ -6395,6 +6398,14 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool } else { + if (compiler->compLocallocUsed) + { + // Restore sp from fp + // sub sp, fp, #outsz + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); + compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); + } + // Generate: // ldp fp,lr,[sp,#outsz] // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if it's large @@ -9392,33 +9403,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); - // We delay starting the unwind codes until we have an instruction which we know - // needs an unwind code. - - bool unwindStarted = false; - - // Tear down the stack frame + compiler->unwindBegEpilog(); - if (compiler->compLocallocUsed) - { - if (!unwindStarted) - { - compiler->unwindBegEpilog(); - unwindStarted = true; - } - // mov FP into SP - inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); - compiler->unwindSetFrameReg(REG_FPBASE, 0); - } - - genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog, &unwindStarted); - - if (!unwindStarted) - { - // If we haven't generated anything yet, we're certainly going to generate at least one instruction next. - compiler->unwindBegEpilog(); - unwindStarted = true; - } + genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog); if (jmpEpilog) { diff --git a/tests/arm64/Tests.lst b/tests/arm64/Tests.lst index c9153af99f..e0fc9b31f2 100644 --- a/tests/arm64/Tests.lst +++ b/tests/arm64/Tests.lst @@ -11155,98 +11155,98 @@ RelativePath=JIT\jit64\localloc\call\call03_dynamic\call03_dynamic.exe WorkingDir=JIT\jit64\localloc\call\call03_dynamic Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call03_large.exe_1620] RelativePath=JIT\jit64\localloc\call\call03_large\call03_large.exe WorkingDir=JIT\jit64\localloc\call\call03_large Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call03_small.exe_1621] RelativePath=JIT\jit64\localloc\call\call03_small\call03_small.exe WorkingDir=JIT\jit64\localloc\call\call03_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call04_dynamic.exe_1622] RelativePath=JIT\jit64\localloc\call\call04_dynamic\call04_dynamic.exe WorkingDir=JIT\jit64\localloc\call\call04_dynamic Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call04_large.exe_1623] RelativePath=JIT\jit64\localloc\call\call04_large\call04_large.exe WorkingDir=JIT\jit64\localloc\call\call04_large Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call04_small.exe_1624] RelativePath=JIT\jit64\localloc\call\call04_small\call04_small.exe WorkingDir=JIT\jit64\localloc\call\call04_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call05_dynamic.exe_1625] RelativePath=JIT\jit64\localloc\call\call05_dynamic\call05_dynamic.exe WorkingDir=JIT\jit64\localloc\call\call05_dynamic Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call05_large.exe_1626] RelativePath=JIT\jit64\localloc\call\call05_large\call05_large.exe WorkingDir=JIT\jit64\localloc\call\call05_large Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call05_small.exe_1627] RelativePath=JIT\jit64\localloc\call\call05_small\call05_small.exe WorkingDir=JIT\jit64\localloc\call\call05_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call06_dynamic.exe_1628] RelativePath=JIT\jit64\localloc\call\call06_dynamic\call06_dynamic.exe WorkingDir=JIT\jit64\localloc\call\call06_dynamic Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call06_large.exe_1629] RelativePath=JIT\jit64\localloc\call\call06_large\call06_large.exe WorkingDir=JIT\jit64\localloc\call\call06_large Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call06_small.exe_1630] RelativePath=JIT\jit64\localloc\call\call06_small\call06_small.exe WorkingDir=JIT\jit64\localloc\call\call06_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call07_dynamic.exe_1631] RelativePath=JIT\jit64\localloc\call\call07_dynamic\call07_dynamic.exe WorkingDir=JIT\jit64\localloc\call\call07_dynamic Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [call07_small.exe_1632] RelativePath=JIT\jit64\localloc\call\call07_small\call07_small.exe WorkingDir=JIT\jit64\localloc\call\call07_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [eh01_dynamic.exe_1633] RelativePath=JIT\jit64\localloc\eh\eh01_dynamic\eh01_dynamic.exe @@ -26310,7 +26310,7 @@ RelativePath=JIT\Methodical\localloc\call\call01_small\call01_small.exe WorkingDir=JIT\Methodical\localloc\call\call01_small Expected=100 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS HostStyle=Any [verify01_dynamic.exe_3817] RelativePath=JIT\Methodical\localloc\verify\verify01_dynamic\verify01_dynamic.exe |