diff options
author | Egor Chesakov <Egor.Chesakov@microsoft.com> | 2018-12-05 15:29:48 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-05 15:29:48 -0800 |
commit | f5f19dbbc2612390d145bfa7c896784fbc643436 (patch) | |
tree | 2e2b51934562d4873800d36b2ca691a475b61069 | |
parent | 5bb1b41dd19bca8f25b6d048c27d6708b86326cb (diff) | |
download | coreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.tar.gz coreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.tar.bz2 coreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.zip |
Refactor genAllocLclFrame into two architecture-specific functions (#21074)
-rw-r--r-- | src/jit/codegenarmarch.cpp | 134 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 284 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 183 |
3 files changed, 316 insertions, 285 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index bba8c6a1de..9b6132802f 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -3838,4 +3838,138 @@ void CodeGen::genStructReturn(GenTree* treeNode) } // op1 must be multi-reg GT_CALL } + +//------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP. +// +// Notes: +// On ARM64, this only does the probing; allocating the frame is done when +// callee-saved registers are saved. +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + assert(compiler->compGeneratingProlog); + + if (frameSize == 0) + { + return; + } + + const target_size_t pageSize = compiler->eeGetPageSize(); + + assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); + + if (frameSize < pageSize) + { +#ifdef _TARGET_ARM_ + // Frame size is (0x0008..0x1000) + inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); +#endif // _TARGET_ARM_ + } + else if (frameSize < compiler->getVeryLargeFrameSize()) + { + // Frame size is (0x1000..0x3000) + + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize); + getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg); + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + + if (frameSize >= 0x2000) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize); + getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg); + regSet.verifyRegUsed(initReg); + } + +#ifdef _TARGET_ARM64_ + compiler->unwindPadding(); +#else // !_TARGET_ARM64_ + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize); + compiler->unwindPadding(); + getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg); +#endif // !_TARGET_ARM64_ + } + else + { + // Frame size >= 0x3000 + assert(frameSize >= compiler->getVeryLargeFrameSize()); + + // Emit the following sequence to 'tickle' the pages. + // Note it is important that stack pointer not change until this is + // complete since the tickles could cause a stack overflow, and we + // need to be able to crawl the stack afterward (which means the + // stack pointer needs to be known). + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); + + // + // Can't have a label inside the ReJIT padding area + // + genPrologPadForReJit(); + + // TODO-ARM64-Bug?: set the availMask properly! + regMaskTP availMask = + (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers + availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg + + regNumber rOffset = initReg; + regNumber rLimit; + regNumber rTemp; + regMaskTP tempMask; + + // We pick the next lowest register number for rTemp + noway_assert(availMask != RBM_NONE); + tempMask = genFindLowestBit(availMask); + rTemp = genRegNumFromMask(tempMask); + availMask &= ~tempMask; + + // We pick the next lowest register number for rLimit + noway_assert(availMask != RBM_NONE); + tempMask = genFindLowestBit(availMask); + rLimit = genRegNumFromMask(tempMask); + availMask &= ~tempMask; + + // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't + // make sense. + // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp. + // + // mov rLimit, -frameSize + // loop: + // ldr rTemp, [sp+rOffset] + // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding + // cmp rOffset, rLimit + // jge loop + noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize); + getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset); + regSet.verifyRegUsed(rTemp); +#if defined(_TARGET_ARM_) + getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize); +#elif defined(_TARGET_ARM64_) + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize); +#endif // _TARGET_ARM64_ + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit); + getEmitter()->emitIns_J(INS_bhi, NULL, -4); + + *pInitRegZeroed = false; // The initReg does not contain zero + + compiler->unwindPadding(); + +#ifdef _TARGET_ARM_ + inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL); +#endif // _TARGET_ARM_ + } + +#ifdef _TARGET_ARM_ + compiler->unwindAllocStack(frameSize); + + if (!doubleAlignOrFramePointerUsed()) + { + psiAdjustStackLevel(frameSize); + } +#endif // _TARGET_ARM_ +} + #endif // _TARGET_ARMARCH_ diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index baaacaab15..e90a7f6567 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -5277,290 +5277,6 @@ void CodeGen::genPushCalleeSavedRegisters() #endif // _TARGET_* } -/*----------------------------------------------------------------------------- - * - * Probe the stack and allocate the local stack frame: subtract from SP. - * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved. - */ - -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) -{ - assert(compiler->compGeneratingProlog); - - if (frameSize == 0) - { - return; - } - - const target_size_t pageSize = compiler->eeGetPageSize(); - -#ifdef _TARGET_ARM_ - assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); -#endif // _TARGET_ARM_ - -#ifdef _TARGET_XARCH_ - if (frameSize == REGSIZE_BYTES) - { - // Frame size is the same as register size. - inst_RV(INS_push, REG_EAX, TYP_I_IMPL); - } - else -#endif // _TARGET_XARCH_ - if (frameSize < pageSize) - { -#ifndef _TARGET_ARM64_ - // Frame size is (0x0008..0x1000) - inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); -#endif // !_TARGET_ARM64_ - } - else if (frameSize < compiler->getVeryLargeFrameSize()) - { - // Frame size is (0x1000..0x3000) - CLANG_FORMAT_COMMENT_ANCHOR; - -#if CPU_LOAD_STORE_ARCH - instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize); - getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg); - regSet.verifyRegUsed(initReg); - *pInitRegZeroed = false; // The initReg does not contain zero -#else - getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize); -#endif - - if (frameSize >= 0x2000) - { -#if CPU_LOAD_STORE_ARCH - instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize); - getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg); - regSet.verifyRegUsed(initReg); -#else - getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize); -#endif - } - -#ifdef _TARGET_ARM64_ - compiler->unwindPadding(); -#else // !_TARGET_ARM64_ -#if CPU_LOAD_STORE_ARCH - instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize); - compiler->unwindPadding(); - getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg); -#else - inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); -#endif -#endif // !_TARGET_ARM64_ - } - else - { - // Frame size >= 0x3000 - assert(frameSize >= compiler->getVeryLargeFrameSize()); - - // Emit the following sequence to 'tickle' the pages. - // Note it is important that stack pointer not change until this is - // complete since the tickles could cause a stack overflow, and we - // need to be able to crawl the stack afterward (which means the - // stack pointer needs to be known). - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef _TARGET_XARCH_ - bool pushedStubParam = false; - if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg)) - { - // push register containing the StubParam - inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL); - pushedStubParam = true; - } -#endif // !_TARGET_XARCH_ - -#if CPU_LOAD_STORE_ARCH || !defined(_TARGET_UNIX_) - instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); -#endif - - // - // Can't have a label inside the ReJIT padding area - // - genPrologPadForReJit(); - -#if CPU_LOAD_STORE_ARCH - - // TODO-ARM64-Bug?: set the availMask properly! - regMaskTP availMask = - (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers - availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live - availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg - - regNumber rOffset = initReg; - regNumber rLimit; - regNumber rTemp; - regMaskTP tempMask; - - // We pick the next lowest register number for rTemp - noway_assert(availMask != RBM_NONE); - tempMask = genFindLowestBit(availMask); - rTemp = genRegNumFromMask(tempMask); - availMask &= ~tempMask; - - // We pick the next lowest register number for rLimit - noway_assert(availMask != RBM_NONE); - tempMask = genFindLowestBit(availMask); - rLimit = genRegNumFromMask(tempMask); - availMask &= ~tempMask; - - // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't - // make sense. - // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp. - // - // mov rLimit, -frameSize - // loop: - // ldr rTemp, [sp+rOffset] - // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding - // cmp rOffset, rLimit - // jge loop - noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize); - getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset); - regSet.verifyRegUsed(rTemp); -#if defined(_TARGET_ARM_) - getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize); -#elif defined(_TARGET_ARM64_) - getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize); -#endif // _TARGET_ARM64_ - getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit); - getEmitter()->emitIns_J(INS_bhi, NULL, -4); - -#else // !CPU_LOAD_STORE_ARCH - -#ifndef _TARGET_UNIX_ - // Code size for each instruction. We need this because the - // backward branch is hard-coded with the number of bytes to branch. - // The encoding differs based on the architecture and what register is - // used (namely, using RAX has a smaller encoding). - // - // loop: - // For x86 - // test [esp + eax], eax 3 - // sub eax, 0x1000 5 - // cmp EAX, -frameSize 5 - // jge loop 2 - // - // For AMD64 using RAX - // test [rsp + rax], rax 4 - // sub rax, 0x1000 6 - // cmp rax, -frameSize 6 - // jge loop 2 - // - // For AMD64 using RBP - // test [rsp + rbp], rbp 4 - // sub rbp, 0x1000 7 - // cmp rbp, -frameSize 7 - // jge loop 2 - - getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0); - inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE); - inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE); - - int bytesForBackwardJump; -#ifdef _TARGET_AMD64_ - assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets. - bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20); -#else // !_TARGET_AMD64_ - assert(initReg == REG_EAX); - bytesForBackwardJump = -15; -#endif // !_TARGET_AMD64_ - - inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop -#else // _TARGET_UNIX_ - // Code size for each instruction. We need this because the - // backward branch is hard-coded with the number of bytes to branch. - // The encoding differs based on the architecture and what register is - // used (namely, using RAX has a smaller encoding). - // - // For x86 - // lea eax, [esp - frameSize] - // loop: - // lea esp, [esp - pageSize] 7 - // test [esp], eax 3 - // cmp esp, eax 2 - // jge loop 2 - // lea rsp, [rbp + frameSize] - // - // For AMD64 using RAX - // lea rax, [rsp - frameSize] - // loop: - // lea rsp, [rsp - pageSize] 8 - // test [rsp], rax 4 - // cmp rsp, rax 3 - // jge loop 2 - // lea rsp, [rax + frameSize] - // - // For AMD64 using RBP - // lea rbp, [rsp - frameSize] - // loop: - // lea rsp, [rsp - pageSize] 8 - // test [rsp], rbp 4 - // cmp rsp, rbp 3 - // jge loop 2 - // lea rsp, [rbp + frameSize] - - int sPageSize = (int)pageSize; - - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border - - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize); - getEmitter()->emitIns_R_AR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, 0); - inst_RV_RV(INS_cmp, REG_SPBASE, initReg); - - int bytesForBackwardJump; -#ifdef _TARGET_AMD64_ - assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets. - bytesForBackwardJump = -17; -#else // !_TARGET_AMD64_ - assert(initReg == REG_EAX); - bytesForBackwardJump = -14; -#endif // !_TARGET_AMD64_ - - inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop - - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer -#endif // _TARGET_UNIX_ - -#endif // !CPU_LOAD_STORE_ARCH - - *pInitRegZeroed = false; // The initReg does not contain zero - -#ifdef _TARGET_XARCH_ - if (pushedStubParam) - { - // pop eax - inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL); - regSet.verifyRegUsed(REG_SECRET_STUB_PARAM); - } -#endif // _TARGET_XARCH_ - -#if CPU_LOAD_STORE_ARCH - compiler->unwindPadding(); -#endif - -#if CPU_LOAD_STORE_ARCH -#ifndef _TARGET_ARM64_ - inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL); -#endif // !_TARGET_ARM64_ -#else - // sub esp, frameSize 6 - inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); -#endif - } - -#ifndef _TARGET_ARM64_ - compiler->unwindAllocStack(frameSize); - - if (!doubleAlignOrFramePointerUsed()) - { - psiAdjustStackLevel(frameSize); - } -#endif // !_TARGET_ARM64_ -} - #if defined(_TARGET_ARM_) void CodeGen::genPushFltRegs(regMaskTP regMask) diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index e81f5bab03..f63fe142ce 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2158,13 +2158,194 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) offset += genTypeSize(type); } - varDsc->lvRegNum = REG_STK; + varDsc->lvRegNum = REG_STK; #else // !UNIX_AMD64_ABI && !_TARGET_X86_ assert(!"Unreached"); #endif // !UNIX_AMD64_ABI && !_TARGET_X86_ } //------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP. +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + assert(compiler->compGeneratingProlog); + + if (frameSize == 0) + { + return; + } + + const target_size_t pageSize = compiler->eeGetPageSize(); + + if (frameSize == REGSIZE_BYTES) + { + // Frame size is the same as register size. + inst_RV(INS_push, REG_EAX, TYP_I_IMPL); + } + else if (frameSize < pageSize) + { + // Frame size is (0x0008..0x1000) + inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); + } + else if (frameSize < compiler->getVeryLargeFrameSize()) + { + // Frame size is (0x1000..0x3000) + + getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize); + + if (frameSize >= 0x2000) + { + getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize); + } + + inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); + } + else + { + // Frame size >= 0x3000 + assert(frameSize >= compiler->getVeryLargeFrameSize()); + + // Emit the following sequence to 'tickle' the pages. + // Note it is important that stack pointer not change until this is + // complete since the tickles could cause a stack overflow, and we + // need to be able to crawl the stack afterward (which means the + // stack pointer needs to be known). + + bool pushedStubParam = false; + if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg)) + { + // push register containing the StubParam + inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL); + pushedStubParam = true; + } + +#ifndef _TARGET_UNIX_ + instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); +#endif + + // + // Can't have a label inside the ReJIT padding area + // + genPrologPadForReJit(); + +#ifndef _TARGET_UNIX_ + // Code size for each instruction. We need this because the + // backward branch is hard-coded with the number of bytes to branch. + // The encoding differs based on the architecture and what register is + // used (namely, using RAX has a smaller encoding). + // + // loop: + // For x86 + // test [esp + eax], eax 3 + // sub eax, 0x1000 5 + // cmp EAX, -frameSize 5 + // jge loop 2 + // + // For AMD64 using RAX + // test [rsp + rax], rax 4 + // sub rax, 0x1000 6 + // cmp rax, -frameSize 6 + // jge loop 2 + // + // For AMD64 using RBP + // test [rsp + rbp], rbp 4 + // sub rbp, 0x1000 7 + // cmp rbp, -frameSize 7 + // jge loop 2 + + getEmitter()->emitIns_R_ARR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0); + inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE); + inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE); + + int bytesForBackwardJump; +#ifdef _TARGET_AMD64_ + assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets. + bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20); +#else // !_TARGET_AMD64_ + assert(initReg == REG_EAX); + bytesForBackwardJump = -15; +#endif // !_TARGET_AMD64_ + + // Branch backwards to start of loop + inst_IV(INS_jge, bytesForBackwardJump); +#else // _TARGET_UNIX_ + // Code size for each instruction. We need this because the + // backward branch is hard-coded with the number of bytes to branch. + // The encoding differs based on the architecture and what register is + // used (namely, using RAX has a smaller encoding). + // + // For x86 + // lea eax, [esp - frameSize] + // loop: + // lea esp, [esp - pageSize] 7 + // test [esp], eax 3 + // cmp esp, eax 2 + // jge loop 2 + // lea rsp, [rbp + frameSize] + // + // For AMD64 using RAX + // lea rax, [rsp - frameSize] + // loop: + // lea rsp, [rsp - pageSize] 8 + // test [rsp], rax 4 + // cmp rsp, rax 3 + // jge loop 2 + // lea rsp, [rax + frameSize] + // + // For AMD64 using RBP + // lea rbp, [rsp - frameSize] + // loop: + // lea rsp, [rsp - pageSize] 8 + // test [rsp], rbp 4 + // cmp rsp, rbp 3 + // jge loop 2 + // lea rsp, [rbp + frameSize] + + int sPageSize = (int)pageSize; + + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border + + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize); + getEmitter()->emitIns_R_AR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, 0); + inst_RV_RV(INS_cmp, REG_SPBASE, initReg); + + int bytesForBackwardJump; +#ifdef _TARGET_AMD64_ + assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets. + bytesForBackwardJump = -17; +#else // !_TARGET_AMD64_ + assert(initReg == REG_EAX); + bytesForBackwardJump = -14; +#endif // !_TARGET_AMD64_ + + inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop + + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer +#endif // _TARGET_UNIX_ + + *pInitRegZeroed = false; // The initReg does not contain zero + + if (pushedStubParam) + { + // pop eax + inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL); + regSet.verifyRegUsed(REG_SECRET_STUB_PARAM); + } + + // sub esp, frameSize 6 + inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); + } + + compiler->unwindAllocStack(frameSize); + + if (!doubleAlignOrFramePointerUsed()) + { + psiAdjustStackLevel(frameSize); + } +} + +//------------------------------------------------------------------------ // genLclHeap: Generate code for localloc. // // Arguments: |