summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEgor Chesakov <Egor.Chesakov@microsoft.com>2018-12-05 15:29:48 -0800
committerGitHub <noreply@github.com>2018-12-05 15:29:48 -0800
commitf5f19dbbc2612390d145bfa7c896784fbc643436 (patch)
tree2e2b51934562d4873800d36b2ca691a475b61069
parent5bb1b41dd19bca8f25b6d048c27d6708b86326cb (diff)
downloadcoreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.tar.gz
coreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.tar.bz2
coreclr-f5f19dbbc2612390d145bfa7c896784fbc643436.zip
Refactor genAllocLclFrame into two architecture-specific functions (#21074)
-rw-r--r--src/jit/codegenarmarch.cpp134
-rw-r--r--src/jit/codegencommon.cpp284
-rw-r--r--src/jit/codegenxarch.cpp183
3 files changed, 316 insertions, 285 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index bba8c6a1de..9b6132802f 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -3838,4 +3838,138 @@ void CodeGen::genStructReturn(GenTree* treeNode)
} // op1 must be multi-reg GT_CALL
}
+
+//------------------------------------------------------------------------
+// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
+//
+// Notes:
+// On ARM64, this only does the probing; allocating the frame is done when
+// callee-saved registers are saved.
+//
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (frameSize == 0)
+ {
+ return;
+ }
+
+ const target_size_t pageSize = compiler->eeGetPageSize();
+
+ assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+
+ if (frameSize < pageSize)
+ {
+#ifdef _TARGET_ARM_
+ // Frame size is (0x0008..0x1000)
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif // _TARGET_ARM_
+ }
+ else if (frameSize < compiler->getVeryLargeFrameSize())
+ {
+ // Frame size is (0x1000..0x3000)
+
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regSet.verifyRegUsed(initReg);
+ *pInitRegZeroed = false; // The initReg does not contain zero
+
+ if (frameSize >= 0x2000)
+ {
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regSet.verifyRegUsed(initReg);
+ }
+
+#ifdef _TARGET_ARM64_
+ compiler->unwindPadding();
+#else // !_TARGET_ARM64_
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
+ compiler->unwindPadding();
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
+#endif // !_TARGET_ARM64_
+ }
+ else
+ {
+ // Frame size >= 0x3000
+ assert(frameSize >= compiler->getVeryLargeFrameSize());
+
+ // Emit the following sequence to 'tickle' the pages.
+ // Note it is important that stack pointer not change until this is
+ // complete since the tickles could cause a stack overflow, and we
+ // need to be able to crawl the stack afterward (which means the
+ // stack pointer needs to be known).
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+
+ //
+ // Can't have a label inside the ReJIT padding area
+ //
+ genPrologPadForReJit();
+
+ // TODO-ARM64-Bug?: set the availMask properly!
+ regMaskTP availMask =
+ (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
+ availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
+ availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+ regNumber rOffset = initReg;
+ regNumber rLimit;
+ regNumber rTemp;
+ regMaskTP tempMask;
+
+ // We pick the next lowest register number for rTemp
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rTemp = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // We pick the next lowest register number for rLimit
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rLimit = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
+ // make sense.
+ // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
+ //
+ // mov rLimit, -frameSize
+ // loop:
+ // ldr rTemp, [sp+rOffset]
+ // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
+ // cmp rOffset, rLimit
+ // jge loop
+ noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+ regSet.verifyRegUsed(rTemp);
+#if defined(_TARGET_ARM_)
+ getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
+#elif defined(_TARGET_ARM64_)
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
+#endif // _TARGET_ARM64_
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
+ getEmitter()->emitIns_J(INS_bhi, NULL, -4);
+
+ *pInitRegZeroed = false; // The initReg does not contain zero
+
+ compiler->unwindPadding();
+
+#ifdef _TARGET_ARM_
+ inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
+#endif // _TARGET_ARM_
+ }
+
+#ifdef _TARGET_ARM_
+ compiler->unwindAllocStack(frameSize);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(frameSize);
+ }
+#endif // _TARGET_ARM_
+}
+
#endif // _TARGET_ARMARCH_
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index baaacaab15..e90a7f6567 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -5277,290 +5277,6 @@ void CodeGen::genPushCalleeSavedRegisters()
#endif // _TARGET_*
}
-/*-----------------------------------------------------------------------------
- *
- * Probe the stack and allocate the local stack frame: subtract from SP.
- * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
- */
-
-void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
-{
- assert(compiler->compGeneratingProlog);
-
- if (frameSize == 0)
- {
- return;
- }
-
- const target_size_t pageSize = compiler->eeGetPageSize();
-
-#ifdef _TARGET_ARM_
- assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
-#endif // _TARGET_ARM_
-
-#ifdef _TARGET_XARCH_
- if (frameSize == REGSIZE_BYTES)
- {
- // Frame size is the same as register size.
- inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
- }
- else
-#endif // _TARGET_XARCH_
- if (frameSize < pageSize)
- {
-#ifndef _TARGET_ARM64_
- // Frame size is (0x0008..0x1000)
- inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
-#endif // !_TARGET_ARM64_
- }
- else if (frameSize < compiler->getVeryLargeFrameSize())
- {
- // Frame size is (0x1000..0x3000)
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
- getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
- regSet.verifyRegUsed(initReg);
- *pInitRegZeroed = false; // The initReg does not contain zero
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
-#endif
-
- if (frameSize >= 0x2000)
- {
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
- getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
- regSet.verifyRegUsed(initReg);
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
-#endif
- }
-
-#ifdef _TARGET_ARM64_
- compiler->unwindPadding();
-#else // !_TARGET_ARM64_
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
- compiler->unwindPadding();
- getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
-#else
- inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
-#endif
-#endif // !_TARGET_ARM64_
- }
- else
- {
- // Frame size >= 0x3000
- assert(frameSize >= compiler->getVeryLargeFrameSize());
-
- // Emit the following sequence to 'tickle' the pages.
- // Note it is important that stack pointer not change until this is
- // complete since the tickles could cause a stack overflow, and we
- // need to be able to crawl the stack afterward (which means the
- // stack pointer needs to be known).
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_XARCH_
- bool pushedStubParam = false;
- if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
- {
- // push register containing the StubParam
- inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
- pushedStubParam = true;
- }
-#endif // !_TARGET_XARCH_
-
-#if CPU_LOAD_STORE_ARCH || !defined(_TARGET_UNIX_)
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
-#endif
-
- //
- // Can't have a label inside the ReJIT padding area
- //
- genPrologPadForReJit();
-
-#if CPU_LOAD_STORE_ARCH
-
- // TODO-ARM64-Bug?: set the availMask properly!
- regMaskTP availMask =
- (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
- availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
- availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
-
- regNumber rOffset = initReg;
- regNumber rLimit;
- regNumber rTemp;
- regMaskTP tempMask;
-
- // We pick the next lowest register number for rTemp
- noway_assert(availMask != RBM_NONE);
- tempMask = genFindLowestBit(availMask);
- rTemp = genRegNumFromMask(tempMask);
- availMask &= ~tempMask;
-
- // We pick the next lowest register number for rLimit
- noway_assert(availMask != RBM_NONE);
- tempMask = genFindLowestBit(availMask);
- rLimit = genRegNumFromMask(tempMask);
- availMask &= ~tempMask;
-
- // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
- // make sense.
- // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
- //
- // mov rLimit, -frameSize
- // loop:
- // ldr rTemp, [sp+rOffset]
- // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
- // cmp rOffset, rLimit
- // jge loop
- noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
- getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
- regSet.verifyRegUsed(rTemp);
-#if defined(_TARGET_ARM_)
- getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
-#elif defined(_TARGET_ARM64_)
- getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
-#endif // _TARGET_ARM64_
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
- getEmitter()->emitIns_J(INS_bhi, NULL, -4);
-
-#else // !CPU_LOAD_STORE_ARCH
-
-#ifndef _TARGET_UNIX_
- // Code size for each instruction. We need this because the
- // backward branch is hard-coded with the number of bytes to branch.
- // The encoding differs based on the architecture and what register is
- // used (namely, using RAX has a smaller encoding).
- //
- // loop:
- // For x86
- // test [esp + eax], eax 3
- // sub eax, 0x1000 5
- // cmp EAX, -frameSize 5
- // jge loop 2
- //
- // For AMD64 using RAX
- // test [rsp + rax], rax 4
- // sub rax, 0x1000 6
- // cmp rax, -frameSize 6
- // jge loop 2
- //
- // For AMD64 using RBP
- // test [rsp + rbp], rbp 4
- // sub rbp, 0x1000 7
- // cmp rbp, -frameSize 7
- // jge loop 2
-
- getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
- inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
- inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
-
- int bytesForBackwardJump;
-#ifdef _TARGET_AMD64_
- assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
- bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
-#else // !_TARGET_AMD64_
- assert(initReg == REG_EAX);
- bytesForBackwardJump = -15;
-#endif // !_TARGET_AMD64_
-
- inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
-#else // _TARGET_UNIX_
- // Code size for each instruction. We need this because the
- // backward branch is hard-coded with the number of bytes to branch.
- // The encoding differs based on the architecture and what register is
- // used (namely, using RAX has a smaller encoding).
- //
- // For x86
- // lea eax, [esp - frameSize]
- // loop:
- // lea esp, [esp - pageSize] 7
- // test [esp], eax 3
- // cmp esp, eax 2
- // jge loop 2
- // lea rsp, [rbp + frameSize]
- //
- // For AMD64 using RAX
- // lea rax, [rsp - frameSize]
- // loop:
- // lea rsp, [rsp - pageSize] 8
- // test [rsp], rax 4
- // cmp rsp, rax 3
- // jge loop 2
- // lea rsp, [rax + frameSize]
- //
- // For AMD64 using RBP
- // lea rbp, [rsp - frameSize]
- // loop:
- // lea rsp, [rsp - pageSize] 8
- // test [rsp], rbp 4
- // cmp rsp, rbp 3
- // jge loop 2
- // lea rsp, [rbp + frameSize]
-
- int sPageSize = (int)pageSize;
-
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border
-
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize);
- getEmitter()->emitIns_R_AR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, 0);
- inst_RV_RV(INS_cmp, REG_SPBASE, initReg);
-
- int bytesForBackwardJump;
-#ifdef _TARGET_AMD64_
- assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
- bytesForBackwardJump = -17;
-#else // !_TARGET_AMD64_
- assert(initReg == REG_EAX);
- bytesForBackwardJump = -14;
-#endif // !_TARGET_AMD64_
-
- inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
-
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer
-#endif // _TARGET_UNIX_
-
-#endif // !CPU_LOAD_STORE_ARCH
-
- *pInitRegZeroed = false; // The initReg does not contain zero
-
-#ifdef _TARGET_XARCH_
- if (pushedStubParam)
- {
- // pop eax
- inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
- regSet.verifyRegUsed(REG_SECRET_STUB_PARAM);
- }
-#endif // _TARGET_XARCH_
-
-#if CPU_LOAD_STORE_ARCH
- compiler->unwindPadding();
-#endif
-
-#if CPU_LOAD_STORE_ARCH
-#ifndef _TARGET_ARM64_
- inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
-#endif // !_TARGET_ARM64_
-#else
- // sub esp, frameSize 6
- inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
-#endif
- }
-
-#ifndef _TARGET_ARM64_
- compiler->unwindAllocStack(frameSize);
-
- if (!doubleAlignOrFramePointerUsed())
- {
- psiAdjustStackLevel(frameSize);
- }
-#endif // !_TARGET_ARM64_
-}
-
#if defined(_TARGET_ARM_)
void CodeGen::genPushFltRegs(regMaskTP regMask)
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index e81f5bab03..f63fe142ce 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -2158,13 +2158,194 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
offset += genTypeSize(type);
}
- varDsc->lvRegNum = REG_STK;
+ varDsc->lvRegNum = REG_STK;
#else // !UNIX_AMD64_ABI && !_TARGET_X86_
assert(!"Unreached");
#endif // !UNIX_AMD64_ABI && !_TARGET_X86_
}
//------------------------------------------------------------------------
+// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
+//
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (frameSize == 0)
+ {
+ return;
+ }
+
+ const target_size_t pageSize = compiler->eeGetPageSize();
+
+ if (frameSize == REGSIZE_BYTES)
+ {
+ // Frame size is the same as register size.
+ inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
+ }
+ else if (frameSize < pageSize)
+ {
+ // Frame size is (0x0008..0x1000)
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+ }
+ else if (frameSize < compiler->getVeryLargeFrameSize())
+ {
+ // Frame size is (0x1000..0x3000)
+
+ getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
+
+ if (frameSize >= 0x2000)
+ {
+ getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
+ }
+
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+ }
+ else
+ {
+ // Frame size >= 0x3000
+ assert(frameSize >= compiler->getVeryLargeFrameSize());
+
+ // Emit the following sequence to 'tickle' the pages.
+ // Note it is important that stack pointer not change until this is
+ // complete since the tickles could cause a stack overflow, and we
+ // need to be able to crawl the stack afterward (which means the
+ // stack pointer needs to be known).
+
+ bool pushedStubParam = false;
+ if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
+ {
+ // push register containing the StubParam
+ inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ pushedStubParam = true;
+ }
+
+#ifndef _TARGET_UNIX_
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+#endif
+
+ //
+ // Can't have a label inside the ReJIT padding area
+ //
+ genPrologPadForReJit();
+
+#ifndef _TARGET_UNIX_
+ // Code size for each instruction. We need this because the
+ // backward branch is hard-coded with the number of bytes to branch.
+ // The encoding differs based on the architecture and what register is
+ // used (namely, using RAX has a smaller encoding).
+ //
+ // loop:
+ // For x86
+ // test [esp + eax], eax 3
+ // sub eax, 0x1000 5
+ // cmp EAX, -frameSize 5
+ // jge loop 2
+ //
+ // For AMD64 using RAX
+ // test [rsp + rax], rax 4
+ // sub rax, 0x1000 6
+ // cmp rax, -frameSize 6
+ // jge loop 2
+ //
+ // For AMD64 using RBP
+ // test [rsp + rbp], rbp 4
+ // sub rbp, 0x1000 7
+ // cmp rbp, -frameSize 7
+ // jge loop 2
+
+ getEmitter()->emitIns_R_ARR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
+ inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
+ inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
+
+ int bytesForBackwardJump;
+#ifdef _TARGET_AMD64_
+ assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
+ bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
+#else // !_TARGET_AMD64_
+ assert(initReg == REG_EAX);
+ bytesForBackwardJump = -15;
+#endif // !_TARGET_AMD64_
+
+ // Branch backwards to start of loop
+ inst_IV(INS_jge, bytesForBackwardJump);
+#else // _TARGET_UNIX_
+ // Code size for each instruction. We need this because the
+ // backward branch is hard-coded with the number of bytes to branch.
+ // The encoding differs based on the architecture and what register is
+ // used (namely, using RAX has a smaller encoding).
+ //
+ // For x86
+ // lea eax, [esp - frameSize]
+ // loop:
+ // lea esp, [esp - pageSize] 7
+ // test [esp], eax 3
+ // cmp esp, eax 2
+ // jge loop 2
+ // lea rsp, [rbp + frameSize]
+ //
+ // For AMD64 using RAX
+ // lea rax, [rsp - frameSize]
+ // loop:
+ // lea rsp, [rsp - pageSize] 8
+ // test [rsp], rax 4
+ // cmp rsp, rax 3
+ // jge loop 2
+ // lea rsp, [rax + frameSize]
+ //
+ // For AMD64 using RBP
+ // lea rbp, [rsp - frameSize]
+ // loop:
+ // lea rsp, [rsp - pageSize] 8
+ // test [rsp], rbp 4
+ // cmp rsp, rbp 3
+ // jge loop 2
+ // lea rsp, [rbp + frameSize]
+
+ int sPageSize = (int)pageSize;
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize);
+ getEmitter()->emitIns_R_AR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, 0);
+ inst_RV_RV(INS_cmp, REG_SPBASE, initReg);
+
+ int bytesForBackwardJump;
+#ifdef _TARGET_AMD64_
+ assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
+ bytesForBackwardJump = -17;
+#else // !_TARGET_AMD64_
+ assert(initReg == REG_EAX);
+ bytesForBackwardJump = -14;
+#endif // !_TARGET_AMD64_
+
+ inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer
+#endif // _TARGET_UNIX_
+
+ *pInitRegZeroed = false; // The initReg does not contain zero
+
+ if (pushedStubParam)
+ {
+ // pop eax
+ inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ regSet.verifyRegUsed(REG_SECRET_STUB_PARAM);
+ }
+
+ // sub esp, frameSize 6
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+ }
+
+ compiler->unwindAllocStack(frameSize);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(frameSize);
+ }
+}
+
+//------------------------------------------------------------------------
// genLclHeap: Generate code for localloc.
//
// Arguments: