summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSaeHie Park <saehie.park@samsung.com>2017-01-09 10:39:54 +0900
committerSaeHie Park <saehie.park@samsung.com>2017-02-07 09:02:00 +0900
commit5df45286968775ffccaf01669eba7c8baa9923e1 (patch)
tree139d7e41e020159d1fac2ed72943312a9cd08a5a /src
parent65b9cb35226c5395be531554e207bb4e2096092f (diff)
downloadcoreclr-5df45286968775ffccaf01669eba7c8baa9923e1.tar.gz
coreclr-5df45286968775ffccaf01669eba7c8baa9923e1.tar.bz2
coreclr-5df45286968775ffccaf01669eba7c8baa9923e1.zip
[x86/Linux] Stack align 16 bytes for JIT code
Change JIT code to align stack in 16 byte used in modern compiler
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegencommon.cpp15
-rw-r--r--src/jit/codegenxarch.cpp43
-rw-r--r--src/jit/compiler.h19
-rw-r--r--src/jit/gentree.h27
-rw-r--r--src/jit/lclvars.cpp25
-rw-r--r--src/jit/lower.cpp5
-rw-r--r--src/jit/morph.cpp115
-rw-r--r--src/jit/target.h6
8 files changed, 211 insertions, 44 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 396ea77374..b1e474b755 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -3165,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
- noway_assert(getEmitter()->emitMaxStackDepth <=
- (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
- compiler->compHndBBtabCount + // Return address for locally-called finallys
- genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
- (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+ {
+ unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
+ compiler->compHndBBtabCount + // Return address for locally-called finallys
+ genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
+ (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
+#if defined(UNIX_X86_ABI)
+ maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
#endif
+ noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
+ }
+#endif // EMIT_TRACK_STACK_DEPTH
*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 7367dbb8f1..c88b9592ee 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -2645,16 +2645,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);
-#if defined(_TARGET_AMD64_)
- // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
-#elif defined(_TARGET_X86_)
- // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
- inst_IV(INS_push_hide, 0); // --- push 4-byte 0
-#endif // _TARGET_X86_
+ static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
+ unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);
+
+ for (unsigned i = 0; i < count; i++)
+ {
+ inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
+ }
+ // Note that the stack must always be aligned to STACK_ALIGN bytes
// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
@@ -4894,9 +4892,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
- {
#endif // FEATURE_PUT_STRUCT_ARG_STK
+ {
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
@@ -5135,6 +5133,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}
+#if defined(UNIX_X86_ABI)
+ // Put back the stack pointer if there was any padding for stack alignment
+ unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
+ if (padStackAlign != 0)
+ {
+ inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
+ }
+#endif // UNIX_X86_ABI
+
// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
@@ -7753,6 +7760,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
+#if defined(UNIX_X86_ABI)
+ // For each call, first stack argument has the padding for alignment
+ // if this value is not zero, use it to adjust the ESP
+ unsigned argPadding = putArgStk->getArgPadding();
+ if (argPadding != 0)
+ {
+ inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
+ }
+#endif
+
if (varTypeIsStruct(targetType))
{
(void)genAdjustStackForPutArgStk(putArgStk);
@@ -8070,7 +8087,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}
- const unsigned offset = i * 4;
+ const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
@@ -8079,7 +8096,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
- genStackLevel += 4;
+ genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 167d8090b5..6f57813417 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -1186,6 +1186,11 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
+ // argument may have a value to emit "sub esp, n" to adjust the stack before pushing
+ // the argument.
+#endif
bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
@@ -1263,6 +1268,10 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
+ // stack pointer before it was adjusted after each Call
+#endif
unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
@@ -1312,6 +1321,10 @@ public:
void ArgsComplete();
+#if defined(UNIX_X86_ABI)
+ void ArgsAlignPadding();
+#endif
+
void SortArgs();
void EvalArgsToTemps();
@@ -1331,6 +1344,12 @@ public:
{
return nextSlotNum;
}
+#if defined(UNIX_X86_ABI)
+ unsigned GetPadStackAlign()
+ {
+ return padStkAlign;
+ }
+#endif
bool HasRegArgs()
{
return hasRegArgs;
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 10ba4b09a5..da61debf27 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -4546,6 +4546,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
struct GenTreePutArgStk : public GenTreeUnOp
{
unsigned gtSlotNum; // Slot number of the argument to be passed on stack
+#if defined(UNIX_X86_ABI)
+ unsigned gtPadAlign; // Number of padding slots for stack alignment
+#endif
#if FEATURE_FASTTAILCALL
bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
@@ -4561,6 +4564,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4582,6 +4588,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4603,6 +4612,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4622,6 +4634,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4640,6 +4655,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}
+#if defined(UNIX_X86_ABI)
+ unsigned getArgPadding()
+ {
+ return gtPadAlign;
+ }
+
+ void setArgPadding(unsigned padAlign)
+ {
+ gtPadAlign = padAlign;
+ }
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 3af0813fa7..cef861e641 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -5773,6 +5773,7 @@ void Compiler::lvaAlignFrame()
#elif defined(_TARGET_X86_)
+#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
@@ -5783,6 +5784,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
+#endif
+
+ if (STACK_ALIGN > REGSIZE_BYTES)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+ // and thus do not know how much we will need to add in order to be aligned.
+ // We add the maximum pad that we could ever have (which is 12)
+ lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
+ }
+
+ // Align the stack with STACK_ALIGN value.
+ int adjustFrameSize = compLclFrameSize;
+#if defined(UNIX_X86_ABI)
+ // we need to consider spilled register(s) plus return address and/or EBP
+ int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
+ adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
+#endif
+ if ((adjustFrameSize % STACK_ALIGN) != 0)
+ {
+ lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
+ }
+ }
#else
NYI("TARGET specific lvaAlignFrame");
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 1ac4ef4cbf..ce383e22a6 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -943,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif
+#if defined(UNIX_X86_ABI)
+ assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
+ putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 99ef15a3b8..17eb755b2c 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
compiler = comp;
callTree = call;
assert(call->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = 0;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = 0;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = 0;
+#endif
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
@@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
;
callTree = newCall;
assert(newCall->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = oldArgInfo->stkLevel;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = oldArgInfo->stkLevel;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = oldArgInfo->padStkAlign;
+#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
argTable = nullptr;
@@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1689,6 +1701,52 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}
+#if defined(UNIX_X86_ABI)
+// Get the stack alignment value for a Call holding this object
+//
+// NOTE: This function will calculate number of padding slots, to align the
+// stack before pushing arguments to the stack. Padding value is stored in
+// the first argument in fgArgTabEntry structure padStkAlign member so that
+// code (sub esp, n) can be emitted before generating argument push in
+// fgArgTabEntry node. As of result stack will be aligned right before
+// making a "Call". After the Call, stack is re-adjusted to the value it
+// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry.
+//
+void fgArgInfo::ArgsAlignPadding()
+{
+ // To get the padding amount, sum up all the slots and get the remainder for padding
+ unsigned curInx;
+ unsigned numSlots = 0;
+ fgArgTabEntryPtr firstArgTabEntry = nullptr;
+
+ for (curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ if (curArgTabEntry->numSlots > 0)
+ {
+ // The argument may be REG_STK or constant or register that goes to stack
+ assert(nextSlotNum >= curArgTabEntry->slotNum);
+
+ numSlots += curArgTabEntry->numSlots;
+ if (firstArgTabEntry == nullptr)
+ {
+ // First argument will be used to hold the padding amount
+ firstArgTabEntry = curArgTabEntry;
+ }
+ }
+ }
+
+ if (firstArgTabEntry != nullptr)
+ {
+ const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
+ // Set stack align pad for the first argument
+ firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned);
+ // Set also for fgArgInfo that will be used to reset stack pointer after the Call
+ this->padStkAlign = firstArgTabEntry->padStkAlign;
+ }
+}
+#endif // UNIX_X86_ABI
+
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
@@ -4211,6 +4269,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!reMorphing)
{
call->fgArgInfo->ArgsComplete();
+
+#if defined(UNIX_X86_ABI)
+ call->fgArgInfo->ArgsAlignPadding();
+#endif // UNIX_X86_ABI
+
#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
diff --git a/src/jit/target.h b/src/jit/target.h
index 6330d52889..5b608ddfac 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -495,9 +495,15 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
#define CODE_ALIGN 1 // code alignment requirement
+#if !defined(UNIX_X86_ABI)
#define STACK_ALIGN 4 // stack alignment requirement
#define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+#else
+ #define STACK_ALIGN 16 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+ #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+#endif // !UNIX_X86_ABI
#define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI)
#define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX)