From 7ba4d83ee42c5b17a5a4c0dd72772a51f689575d Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Mon, 11 Dec 2017 14:52:05 -0800 Subject: Fix arm/arm64 localloc In the loop, non-initialized memory case, the stack probing code had silent bad codegen, as follows: ``` mov r4, 0x4000 subs r4, sp, r4 bvc SHORT G_M3294_IG03 movs r4, 0 G_M3294_IG03: ldr r0, [sp] sub r0, sp, 0x1000 cmp r0, r4 blo SHORT G_M3294_IG04 mov sp, r0 // Before fix, this was "mov sp, r4" b SHORT G_M3294_IG03 G_M3294_IG04: ``` --- src/jit/codegenarm.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/jit/codegenarm.cpp') diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 69640b739d..34424a09bc 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -346,7 +346,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // There are 2 ways depending from build version to generate code for localloc: // 1) For debug build where memory should be initialized we generate loop // which invoke push {tmpReg} N times. -// 2) Fore /o build However, we tickle the pages to ensure that SP is always +// 2) For non-debug build, we tickle the pages to ensure that SP is always // valid and is in sync with the "stack guard page". Amount of iteration // is N/eeGetPageSize(). // @@ -355,7 +355,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // 1) It's not needed to generate loop for zero size allocation // 2) For small allocation (less than 4 store) we unroll loop // 3) For allocation less than eeGetPageSize() and when it's not needed to initialize -// memory to zero, we can just increment SP. +// memory to zero, we can just decrement SP. // // Notes: Size N should be aligned to STACK_ALIGN before any allocation // @@ -529,17 +529,17 @@ void CodeGen::genLclHeap(GenTreePtr tree) // to tickle the pages to ensure that SP is always valid and is // in sync with the "stack guard page". Note that in the worst // case SP is on the last byte of the guard page. Thus you must - // touch SP+0 first not SP+0x1000. + // touch SP-0 first not SP-0x1000. // // Another subtlety is that you don't want SP to be exactly on the // boundary of the guard page because PUSH is predecrement, thus // call setup would not touch the guard page but just beyond it // // Note that we go through a few hoops so that SP never points to - // illegal pages at any time during the ticking process + // illegal pages at any time during the tickling process // // subs regCnt, SP, regCnt // regCnt now holds ultimate SP - // jb Loop // result is smaller than orignial SP (no wrap around) + // bvc Loop // result is smaller than original SP (no wrap around) // mov regCnt, #0 // Overflow, pick lowest possible value // // Loop: @@ -565,7 +565,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) inst_JMP(EJ_vc, loop); // branch if the V flag is not set - // Ups... Overflow, set regCnt to lowest possible value + // Overflow, set regCnt to lowest possible value instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); genDefineTempLabel(loop); @@ -581,7 +581,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) inst_JMP(jmpLTU, done); // Update SP to be at the next page of stack that we will tickle - getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp); // Jump to loop and tickle new stack address inst_JMP(EJ_jmp, loop); -- cgit v1.2.3