summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorLubomir Litchev <LLITCHEV@users.noreply.github.com>2015-10-29 14:36:05 -0700
committerLubomir Litchev <LLITCHEV@users.noreply.github.com>2015-10-29 14:36:05 -0700
commite75289b2161d7d3ff412f79e7dd6ba82236d8694 (patch)
treee55a2dee7214f7b4235289ee80e3582e12d31542 /src/jit
parent2401877e39f675c188c1add8833256dd31e283d5 (diff)
parent9a185a39b8b07cb56c360147ca8ad14b191f69e1 (diff)
downloadcoreclr-e75289b2161d7d3ff412f79e7dd6ba82236d8694.tar.gz
coreclr-e75289b2161d7d3ff412f79e7dd6ba82236d8694.tar.bz2
coreclr-e75289b2161d7d3ff412f79e7dd6ba82236d8694.zip
Merge pull request #1893 from LLITCHEV/Issue1831
Fix putarg_stk for tail call functions.
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/codegenlinear.h10
-rw-r--r--src/jit/codegenxarch.cpp513
-rw-r--r--src/jit/gentree.h6
3 files changed, 312 insertions, 217 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 6a030eb926..a548e1727c 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -40,6 +40,7 @@
void genMathIntrinsic(GenTreePtr treeNode);
void genPutArgStk(GenTreePtr treeNode);
+ unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
#ifdef FEATURE_SIMD
instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned *ival = nullptr);
@@ -104,7 +105,7 @@
void genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- void genConsumePutArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+ void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
void genConsumeRegs(GenTree* tree);
@@ -131,8 +132,11 @@
void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- void genCodeForPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
- void genCodeForPutArgUnroll(GenTreePutArgStk* putArgStkNode);
+ void genPutStructArgStk(GenTreePtr treeNode
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned baseVarNum));
+
+ void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
+ void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index d55a6b37f7..15337a52ea 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -2670,82 +2670,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_PUTARG_STK:
-#ifdef _TARGET_X86_
genPutArgStk(treeNode);
-#else // !_TARGET_X86_
- {
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- if (targetType == TYP_STRUCT)
- {
- genPutArgStk(treeNode);
- break;
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- // Get argument offset on stack.
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
-
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
-#endif
-
- GenTreePtr data = treeNode->gtOp.gtOp1;
- unsigned varNum;
-
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
- // Whether to setup stk arg in incoming or out-going arg area?
- // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
- // All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
- {
- // The first varNum is guaranteed to be the first incoming arg of the method being compiled.
- // See lvaInitTypeRef() for the order in which lvaTable entries are initialized.
- varNum = 0;
-#ifdef DEBUG
- // This must be a fast tail call.
- assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
-
- // Since it is a fast tail call, the existence of first incoming arg is guaranteed
- // because fast tail call requires that in-coming arg area of caller is >= out-going
- // arg area required for tail call.
- LclVarDsc* varDsc = compiler->lvaTable;
- assert(varDsc != nullptr);
- assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0)));
-#endif
- }
- else
- {
-#if FEATURE_FIXED_OUT_ARGS
- varNum = compiler->lvaOutgoingArgSpaceVar;
-#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
- varNum = BAD_VAR_NUM;
-#endif // !FEATURE_FIXED_OUT_ARGS
- }
-
- if (data->isContained())
- {
- getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), varNum,
- argOffset, (int) data->AsIntConCommon()->IconValue());
- }
- else
- {
- genConsumeReg(data);
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, varNum, argOffset);
- }
- }
-#endif // !_TARGET_X86_
break;
case GT_PUTARG_REG:
@@ -3791,37 +3716,33 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst
}
}
-// Generate code for a store to some address + offset
-// baseNode: tree node which can be either a local address or arbitrary node
-// offset: distance from the baseNode from which to load
+//------------------------------------------------------------------------
+// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
+//
+// Arguments:
+// ins - the instruction to generate.
+// size - the size that needs to be stored.
+// src - the register which needs to be stored.
+// baseNode - the base, relative to which to store the src register.
+// offset - the offset that is added to the baseNode to calculate the address to store into.
+//
+
void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (baseNode->OperGet() == GT_PUTARG_STK)
+ if (baseNode->OperIsLocalAddr())
{
- GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk();
- assert(putArgStkNode->gtOp.gtOp1->isContained());
- assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ);
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ {
+ offset += baseNode->gtLclFld.gtLclOffs;
+ }
- emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar,
- (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset);
+ emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
}
else
-#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
{
-
- if (baseNode->OperIsLocalAddr())
- {
- if (baseNode->gtOper == GT_LCL_FLD_ADDR)
- offset += baseNode->gtLclFld.gtLclOffs;
- emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset);
- }
- else
- {
- emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
- }
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
}
}
@@ -3863,6 +3784,9 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode)
assert(genIsValidFloatReg(xmmReg));
size_t slots = size / XMM_REGSIZE_BYTES;
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
while (slots-- > 0)
{
// Load
@@ -3946,21 +3870,31 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode)
}
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-// Generates PutArg code by performing a loop unroll
+
+//---------------------------------------------------------------------------------------------------------------//
+// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
//
// TODO-Amd64-Unix: Try to share code with copyblk.
-// The difference for now is thethe putarg_stk contains it's children, while cpyblk not.
+// Need refactoring of copyblk before it could be used for putarg_stk.
+// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
// This creates differences in code. After some significant refactoring it could be reused.
-void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
+
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
{
+ noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
// Make sure we got the arguments of the cpblk operation in the right registers
GenTreePtr dstAddr = putArgNode;
GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
- size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ size_t size = putArgNode->getArgSize();
assert(size <= CPBLK_UNROLL_LIMIT);
emitter *emit = getEmitter();
+ unsigned putArgOffset = putArgNode->getArgOffset();
assert(srcAddr->isContained());
assert(srcAddr->gtOper == GT_LDOBJ);
@@ -3982,12 +3916,24 @@ void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
assert(genIsValidFloatReg(xmmReg));
size_t slots = size / XMM_REGSIZE_BYTES;
+ assert(putArgNode->gtGetOp1()->isContained());
+ assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_LDOBJ);
+
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
while (slots-- > 0)
{
// Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node.
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtGetOp1(), offset); // Load the address of the child of the LdObj node.
+
// Store
- genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ emit->emitIns_S_R(INS_movdqu,
+ EA_8BYTE,
+ xmmReg,
+ baseVarNum,
+ putArgOffset + offset);
+
offset += XMM_REGSIZE_BYTES;
}
}
@@ -3997,70 +3943,85 @@ void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
{
// Grab the integer temp register to emit the remaining loads and stores.
regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
-
+ assert(genIsValidIntReg(tmpReg));
+
if ((size & 8) != 0)
{
-#ifdef _TARGET_X86_
- // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
- for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
- }
-#else // !_TARGET_X86_
genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
- genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+
+ emit->emitIns_S_R(INS_mov,
+ EA_8BYTE,
+ tmpReg,
+ baseVarNum,
+ putArgOffset + offset);
+
offset += 8;
-#endif // !_TARGET_X86_
}
+
if ((size & 4) != 0)
{
genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+
+ emit->emitIns_S_R(INS_mov,
+ EA_4BYTE,
+ tmpReg,
+ baseVarNum,
+ putArgOffset + offset);
+
offset += 4;
}
+
if ((size & 2) != 0)
{
genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
- genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+
+ emit->emitIns_S_R(INS_mov,
+ EA_2BYTE,
+ tmpReg,
+ baseVarNum,
+ putArgOffset + offset);
+
offset += 2;
}
+
if ((size & 1) != 0)
{
genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
- genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ emit->emitIns_S_R(INS_mov,
+ EA_1BYTE,
+ tmpReg,
+ baseVarNum,
+ putArgOffset + offset);
}
}
}
-// Generate code for CpBlk by using rep movs
+//------------------------------------------------------------------------
+// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct bits will go.
+//
// Preconditions:
-// The size argument of the PutArgStk (for structs) is a constant and is between
-// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode)
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
{
+ assert(putArgNode->TypeGet() == TYP_STRUCT);
+ assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
+ assert(baseVarNum != BAD_VAR_NUM);
// Make sure we got the arguments of the cpblk operation in the right registers
GenTreePtr dstAddr = putArgNode;
- GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
-#ifdef DEBUG
- size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
-#endif // DEBUG
+ GenTreePtr srcAddr = putArgNode->gtGetOp1();
// Validate state.
assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
-
-#ifdef DEBUG
assert(srcAddr->isContained());
-#ifdef _TARGET_AMD64_
- assert(size > CPBLK_UNROLL_LIMIT);
-#else
- assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
-#endif
-
-#endif // DEBUG
- genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
+ genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
instGen(INS_r_movsb);
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -5237,53 +5198,64 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
}
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+//------------------------------------------------------------------------
+// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
+// Also loads in the right register the addresses of the
+// src/dst for rep mov operation.
+//
+// Arguments:
+// putArgNode - the PUTARG_STK tree.
+// dstReg - the dstReg for the rep move operation.
+// srcReg - the srcReg for the rep move operation.
+// sizeReg - the sizeReg for the rep move operation.
+// baseVarNum - the base for var numfor placing the "by-value" args on the stack.
+//
+// Return Value:
+// None.
+//
+// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
+// for copying on the stack a struct with references.
+
+void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
{
+ assert(putArgNode->TypeGet() == TYP_STRUCT);
+ assert(baseVarNum != BAD_VAR_NUM);
+
// The putArgNode children are always contained. We should not consume any registers.
+ assert(putArgNode->gtGetOp1()->isContained());
GenTree* dst = putArgNode;
-#ifdef DEBUG
// Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
- GenTree* src = putArgNode->gtOp.gtOp1;
+ GenTree* src = putArgNode->gtGetOp1();
assert(src->OperGet() == GT_LDOBJ);
- src = src->gtOp.gtOp1;
-#else // !DEBUG
- // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
- GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1;
-#endif // !DEBUG
+ src = src->gtGetOp1();
- size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ size_t size = putArgNode->getArgSize();
GenTree* op1;
GenTree* op2;
- regNumber reg1, reg2, reg3;
op1 = dst;
- reg1 = dstReg;
op2 = src;
- reg2 = srcReg;
- reg3 = sizeReg;
- if (reg2 != REG_NA && op2->gtRegNum != REG_NA)
+ assert(dstReg != REG_NA);
+ assert(srcReg != REG_NA);
+
+ // Consume the registers only if they are not contained or set to REG_NA.
+ if (op2->gtRegNum != REG_NA)
{
genConsumeReg(op2);
}
- if ((reg1 != REG_NA) && (op1->gtRegNum != reg1))
+ // If the op1 is already in the dstReg - nothing to do.
+ // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
+ if (op1->gtRegNum != dstReg)
{
-#if FEATURE_FIXED_OUT_ARGS
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI.
- LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar];
- int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE;
- // Outgoing area always on top of the stack (relative to rsp.)
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset);
-#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
-#endif // !FEATURE_FIXED_OUT_ARGS
-
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area for tail calls) in RDI.
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
}
- if (op2->gtRegNum != reg2)
+ if (op2->gtRegNum != srcReg)
{
if (src->OperIsLocalAddr())
{
@@ -5292,22 +5264,18 @@ void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg
GenTreeLclVarCommon* lclNode = src->AsLclVarCommon();
// Generate LEA instruction to load the LclVar address in RSI.
- LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum];
- int offset = varLclDsc->lvStkOffs;
-
- // Otutgoing area always on top of the stack (relative to rsp.)
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset);
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, 0);
}
else
{
assert(src->gtRegNum != REG_NA);
- getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum);
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, srcReg, src->gtRegNum);
}
}
- if ((reg3 != REG_NA))
+ if (sizeReg != REG_NA)
{
- inst_RV_IV(INS_mov, reg3, size, EA_8BYTE);
+ inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
}
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -7357,40 +7325,183 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode)
genProduceReg(treeNode);
}
-#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-//---------------------------------------------------------------------
-// genPutArgStk - generate code for putting a struct arg on the stack by value.
-// In case there are references to heap object in the struct,
-// it generates the gcinfo as well.
+//-------------------------------------------------------------------------- //
+// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
//
// Arguments
// treeNode - the GT_PUTARG_STK node
//
// Return value:
+// The number of the base variable.
+//
+unsigned
+CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+
+ unsigned baseVarNum;
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ // The first baseVarNum is guaranteed to be the first incoming arg of the method being compiled.
+ // See lvaInitTypeRef() for the order in which lvaTable entries are initialized.
+ baseVarNum = 0;
+#ifdef DEBUG
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = compiler->lvaTable;
+ assert(varDsc != nullptr);
+ assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0)));
+#endif
+ }
+ else
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ baseVarNum = compiler->lvaOutgoingArgSpaceVar;
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+ baseVarNum = BAD_VAR_NUM;
+#endif // !FEATURE_FIXED_OUT_ARGS
+ }
+
+ return baseVarNum;
+}
+
+//--------------------------------------------------------------------- //
+// genPutStructArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
// None
//
-void
+void
CodeGen::genPutArgStk(GenTreePtr treeNode)
{
-#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(treeNode->OperGet() == GT_PUTARG_STK);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
var_types targetType = treeNode->TypeGet();
#ifdef _TARGET_X86_
noway_assert(targetType != TYP_STRUCT);
-#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING)
- noway_assert(targetType == TYP_STRUCT);
+
+ // The following logic is applicable for x86 arch.
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1;
+
+ // On a 32-bit target, all of the long arguments have been decomposed into
+ // a separate putarg_stk for each of the upper and lower halves.
+ noway_assert(targetType != TYP_LONG);
+
+ // Decrement SP.
+ int argSize = genTypeSize(genActualType(targetType));
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+
+ genStackLevel += argSize;
+
+ // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
+ if (data->isContained())
+ {
+ NYI_X86("Contained putarg_stk");
+
+ }
+ else
+ {
+ genConsumeReg(data);
+ getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
+ }
+#else // !_TARGET_X86_
+ {
+ unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (targetType == TYP_STRUCT)
+ {
+ genPutStructArgStk(treeNode, baseVarNum);
+ return;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->getArgOffset();
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+ GenTreePtr data = treeNode->gtGetOp1();
+
+ if (data->isContained())
+ {
+ getEmitter()->emitIns_S_I(ins_Store(targetType),
+ emitTypeSize(targetType),
+ baseVarNum,
+ argOffset,
+ (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ genConsumeReg(data);
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum, argOffset);
+ }
+ }
+#endif // !_TARGET_X86_
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+//---------------------------------------------------------------------
+// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// baseVarNum - the variable number relative to which to put the argument on the stack.
+// For tail calls this is the baseVarNum = 0.
+// For non tail calls this is the outgoingArgSpace.
+//
+// Return value:
+// None
+//
+void
+CodeGen::genPutStructArgStk(GenTreePtr treeNode
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned baseVarNum))
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+ assert(baseVarNum != BAD_VAR_NUM);
+ var_types targetType = treeNode->TypeGet();
+ assert(targetType == TYP_STRUCT);
+
GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
if (putArgStk->gtNumberReferenceSlots == 0)
{
switch (putArgStk->gtPutArgStkKind)
{
case GenTreePutArgStk::PutArgStkKindRepInstr:
- genCodeForPutArgRepMovs(putArgStk);
+ genStructPutArgRepMovs(putArgStk, baseVarNum);
break;
case GenTreePutArgStk::PutArgStkKindUnroll:
- genCodeForPutArgUnroll(putArgStk);
+ genStructPutArgUnroll(putArgStk, baseVarNum);
break;
default:
unreached();
@@ -7402,7 +7513,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
// Consume these registers.
// They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+ genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
GenTreePtr dstAddr = putArgStk;
GenTreePtr srcAddr = putArgStk->gtOp.gtOp1;
gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
@@ -7458,7 +7569,10 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
// See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0);
- getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar,
+ getEmitter()->emitIns_S_R(ins_Store(TYP_REF),
+ EA_GCREF,
+ REG_RCX,
+ baseVarNum,
((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE);
getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE);
@@ -7472,37 +7586,8 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
gcInfo.gcMarkRegSetNpt(RBM_RDI);
}
return;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1;
-
-#if !defined(_TARGET_64BIT_)
- // On a 64-bit target, all of the long arguments have been decomposed into
- // a separate putarg_stk for each of the upper and lower halves.
- noway_assert(targetType != TYP_LONG);
-#endif // !defined(_TARGET_64BIT_)
-
- // Decrement SP.
- int argSize = genTypeSize(genActualType(targetType));
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
-#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
- genStackLevel += argSize;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
- if (data->isContained())
- {
- NYI_X86("Contained putarg_stk");
-
- }
- else
- {
- genConsumeReg(data);
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
}
-#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif //defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
*
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 431f5c85b1..331f87ae80 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -3368,6 +3368,12 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
#endif // FEATURE_FASTTAILCALL
+ unsigned getArgOffset() { return gtSlotNum * TARGET_POINTER_SIZE; }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ unsigned getArgSize() { return gtNumSlots * TARGET_POINTER_SIZE; }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
//------------------------------------------------------------------------
// setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.