summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenlinear.cpp7
-rw-r--r--src/jit/codegenlinear.h4
-rw-r--r--src/jit/codegenxarch.cpp404
-rw-r--r--src/jit/gentree.cpp55
-rw-r--r--src/jit/gentree.h8
-rw-r--r--src/jit/lowerxarch.cpp89
-rw-r--r--src/jit/lsra.cpp17
7 files changed, 428 insertions, 156 deletions
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 68eb2ffe8f..1cff16349c 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -1186,14 +1186,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
#ifdef _TARGET_XARCH_
else if (tree->OperGet() == GT_LCL_VAR)
{
- // A contained lcl var must be living on stack and marked as reg optional.
+ // A contained lcl var must be living on stack and marked as reg optional, or not be a
+ // register candidate.
unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = compiler->lvaTable + varNum;
noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
+ noway_assert(tree->IsRegOptional() || !varDsc->lvLRACandidate);
- // Update the life of reg optional lcl var.
+ // Update the life of the lcl var.
genUpdateLife(tree);
}
#endif // _TARGET_XARCH_
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 6ad72cf781..25ad3f5637 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -167,7 +167,9 @@ void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
#ifdef FEATURE_PUT_STRUCT_ARG_STK
#ifdef _TARGET_X86_
-bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory);
+bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
+void genPushReg(var_types type, regNumber srcReg);
+void genPutArgStkFieldList(GenTreePutArgStk* putArgStk);
#endif // _TARGET_X86_
void genPutStructArgStk(GenTreePutArgStk* treeNode);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 613137e59e..795098975b 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -564,6 +564,8 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
assert(dividend->OperGet() == GT_LONG);
assert(varTypeIsLong(dividend));
+ genConsumeOperands(node);
+
GenTree* const dividendLo = dividend->gtOp1;
GenTree* const dividendHi = dividend->gtOp2;
assert(!dividendLo->isContained());
@@ -574,8 +576,6 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
assert(divisor->gtIconVal >= 2);
assert(divisor->gtIconVal <= 0x3fffffff);
- genConsumeOperands(node);
-
// dividendLo must be in RAX; dividendHi must be in RDX
genCopyRegIfNeeded(dividendLo, REG_EAX);
genCopyRegIfNeeded(dividendHi, REG_EDX);
@@ -3349,6 +3349,8 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
regNumber intTmpReg = REG_NA;
regNumber longTmpReg = REG_NA;
#ifdef _TARGET_X86_
+ // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
+ // less than 16 bytes, we will just be using pushes
if (size >= 8)
{
xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
@@ -3359,6 +3361,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
}
#else // !_TARGET_X86_
+ // On x64 we use an XMM register only for 16-byte chunks.
if (size >= XMM_REGSIZE_BYTES)
{
xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
@@ -7486,29 +7489,234 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
//
// Arguments:
// putArgStk - the putArgStk node.
-// isSrcInMemory - true if the source of the putArgStk node is in
-// memory; false otherwise.
//
// Returns: true if the stack pointer was adjusted; false otherwise.
//
-bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory)
+bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
- assert(isSrcInMemory || (putArgStk->gtOp1->OperGet() == GT_FIELD_LIST));
-
- // If this argument contains any GC pointers or is less than 16 bytes in size and is either in memory or composed
- // entirely of slot-like fields (i.e. integral-types, 4-byte-aligned fields that take up 4 bytes including any
- // padding), we will use a sequence of `push` instructions to store the argument to the stack.
const unsigned argSize = putArgStk->getArgSize();
- if ((putArgStk->gtNumberReferenceSlots != 0) ||
- ((argSize < 16) && (isSrcInMemory || (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::AllSlots))))
+
+ // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
+ // This is set in Lowering, and is true if and only if:
+ // - This argument contains any GC pointers OR
+ // - It is a GT_FIELD_LIST OR
+ // - It is less than 16 bytes in size.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ switch (putArgStk->gtPutArgStkKind)
{
- return false;
+ case GenTreePutArgStk::Kind::RepInstr:
+ case GenTreePutArgStk::Kind::Unroll:
+ assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
+ (argSize >= 16));
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
+ (argSize < 16));
+ break;
+ case GenTreePutArgStk::Kind::Invalid:
+ default:
+ assert(!"Uninitialized GenTreePutArgStk::Kind");
+ break;
}
+#endif // DEBUG
+ if (putArgStk->isPushKind())
+ {
+ m_pushStkArg = true;
+ return false;
+ }
+ m_pushStkArg = false;
inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
genStackLevel += argSize;
return true;
}
+
+//---------------------------------------------------------------------
+// genPutArgStkFieldList - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
+{
+ GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
+ assert(fieldList != nullptr);
+
+ // Set m_pushStkArg and pre-adjust the stack if necessary.
+ const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+ // For now, we only support the "push" case; we will push a full slot for the first field of each slot
+ // within the struct.
+ assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
+
+ // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // (Note that this mode is not currently being used.)
+ // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
+ // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
+ // a multiple of the target pointer size).
+ unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
+ unsigned prevFieldOffset = currentOffset;
+ regNumber tmpReg = REG_NA;
+ if (putArgStk->gtRsvdRegs != RBM_NONE)
+ {
+ assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+ }
+ for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ var_types fieldType = current->gtFieldType;
+
+ // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
+ // field list in descending order by offset.
+ assert(!varTypeIsLong(fieldType));
+ assert(fieldOffset <= prevFieldOffset);
+
+ // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
+ // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
+ // assigned a register, and which is therefore contained.
+ // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
+ genConsumeRegs(fieldNode);
+ regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+
+ // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
+ //
+ // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
+ // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
+ // not require rounding.
+ // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
+ // able to detect stores into the outgoing argument area of the stack on x86.
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
+ int adjustment = roundUp(currentOffset - fieldOffset, 4);
+ if (fieldIsSlot)
+ {
+ fieldType = genActualType(fieldType);
+ unsigned pushSize = genTypeSize(fieldType);
+ assert((pushSize % 4) == 0);
+ adjustment -= pushSize;
+ while (adjustment != 0)
+ {
+ inst_IV(INS_push, 0);
+ currentOffset -= pushSize;
+ genStackLevel += pushSize;
+ adjustment -= pushSize;
+ }
+ m_pushStkArg = true;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ // We always "push" floating point fields (i.e. they are full slot values that don't
+ // require special handling).
+ assert(varTypeIsIntegralOrI(fieldNode));
+ // If we can't push this field, it needs to be in a register so that we can store
+ // it to the stack location.
+ assert(tmpReg != REG_NA);
+ if (adjustment != 0)
+ {
+ // This moves the stack pointer to fieldOffset.
+ // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
+ // Adjust the stack pointer to the next slot boundary.
+ inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
+ currentOffset -= adjustment;
+ genStackLevel += adjustment;
+ }
+
+ // Does it need to be in a byte register?
+ // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If it's already in a register, but not a byteable one, then move it.
+ if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
+ {
+ noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ if (argReg != REG_NA)
+ {
+ inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
+ argReg = tmpReg;
+ }
+ }
+ }
+
+ if (argReg == REG_NA)
+ {
+ if (m_pushStkArg)
+ {
+ if (fieldNode->isContainedSpillTemp())
+ {
+ assert(fieldNode->IsRegOptional());
+ TempDsc* tmp = getSpillTempDsc(fieldNode);
+ getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
+ compiler->tmpRlsTemp(tmp);
+ }
+ else
+ {
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
+ break;
+ case GT_CNS_INT:
+ if (fieldNode->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ break;
+ default:
+ unreached();
+ }
+ }
+ currentOffset -= TARGET_POINTER_SIZE;
+ genStackLevel += TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ break;
+ case GT_CNS_INT:
+ genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ break;
+ default:
+ unreached();
+ }
+ genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ }
+ }
+ else
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ if (m_pushStkArg)
+ {
+ // We always push a slot-rounded size
+ currentOffset -= genTypeSize(fieldType);
+ }
+ }
+
+ prevFieldOffset = fieldOffset;
+ }
+ if (currentOffset != 0)
+ {
+ // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
+ inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
+ genStackLevel += currentOffset;
+ }
+}
#endif // _TARGET_X86_
//---------------------------------------------------------------------
@@ -7527,7 +7735,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
if (varTypeIsStruct(targetType))
{
- m_pushStkArg = !genAdjustStackForPutArgStk(putArgStk, true);
+ (void)genAdjustStackForPutArgStk(putArgStk);
genPutStructArgStk(putArgStk);
return;
}
@@ -7541,10 +7749,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
// a separate putarg_stk for each of the upper and lower halves.
noway_assert(targetType != TYP_LONG);
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
+ const unsigned argSize = putArgStk->getArgSize();
+ assert((argSize % TARGET_POINTER_SIZE) == 0);
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContainedIntOrIImmed())
{
if (data->IsIconHandle())
@@ -7555,115 +7762,18 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
{
inst_IV(INS_push, data->gtIntCon.gtIconVal);
}
+ genStackLevel += argSize;
}
else if (data->OperGet() == GT_FIELD_LIST)
{
- GenTreeFieldList* const fieldList = data->AsFieldList();
- assert(fieldList != nullptr);
-
- m_pushStkArg = false;
- const int argSize = putArgStk->getArgSize();
- assert((argSize % TARGET_POINTER_SIZE) == 0);
-
- const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk, false);
-
- // If the stack was not pre-adjusted, set the current field offset to the size of the struct arg (which must be
- // a multiple of the target pointer size). Otherwise, set the offset to 0.
- int currentOffset = preAdjustedStack ? 0 : argSize;
- unsigned prevFieldOffset = argSize;
- for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
- {
- GenTree* const fieldNode = current->Current();
- const unsigned fieldOffset = current->gtFieldOffset;
- var_types fieldType = current->gtFieldType;
-
- // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
- // field list in descending order by offset.
- assert(!varTypeIsLong(fieldType));
- assert(fieldOffset <= prevFieldOffset);
-
- // TODO-X86-CQ: If this is not a register candidate, or is not in a register,
- // make it contained.
- genConsumeReg(fieldNode);
-
- // If the field is slot-like, we can store the entire register no matter the type.
- const bool fieldIsSlot =
- varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
- if (fieldIsSlot)
- {
- fieldType = genActualType(fieldType);
- assert(genTypeSize(fieldType) == 4);
- }
-
- // We can use a push instruction for any slot-like field.
- //
- // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
- // able to detect stores into the outgoing argument area of the stack on x86.
- const bool usePush = !preAdjustedStack && fieldIsSlot;
- assert(usePush || !varTypeIsGC(fieldType));
-
- // Adjust the stack if necessary. If we are going to generate a `push` instruction, this moves the stack
- // pointer to (fieldOffset + sizeof(fieldType)) to account for the `push`.
- const int fieldSize = genTypeSize(fieldType);
- const int desiredOffset = current->gtFieldOffset + (usePush ? fieldSize : 0);
- if (currentOffset > desiredOffset)
- {
- assert(!preAdjustedStack);
-
- // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
- // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
- // not require rounding.
- const int adjustment = roundUp(currentOffset - desiredOffset, 4);
- assert(!usePush || (adjustment == (currentOffset - desiredOffset)));
- inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
- currentOffset -= adjustment;
- genStackLevel += adjustment;
- }
-
- // Note that the argReg may not be the lcl->gtRegNum, if it has been copied
- // or reloaded to a different register.
- const regNumber argReg = fieldNode->gtRegNum;
- if (usePush)
- {
- // Adjust the stack if necessary and push the field.
- // Push the field.
- inst_RV(INS_push, argReg, fieldType, emitTypeSize(fieldType));
- currentOffset -= fieldSize;
- genStackLevel += fieldSize;
- }
- else
- {
- assert(!m_pushStkArg);
- genStoreRegToStackArg(fieldType, argReg, desiredOffset - currentOffset);
- }
-
- prevFieldOffset = fieldOffset;
- }
-
- // Adjust the stack if necessary.
- if (currentOffset != 0)
- {
- inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
- genStackLevel += currentOffset;
- }
- }
- else if (data->isContained())
- {
- NYI_X86("Contained putarg_stk of non-constant");
+ genPutArgStkFieldList(putArgStk);
}
else
{
+ // We should not see any contained nodes that are not immediates.
+ assert(!data->isContained());
genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
+ genPushReg(targetType, data->gtRegNum);
}
#else // !_TARGET_X86_
{
@@ -7712,6 +7822,48 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#endif // !_TARGET_X86_
}
+#ifdef _TARGET_X86_
+// genPushReg: Push a register value onto the stack and adjust the stack level
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+//
+// Notes:
+// For TYP_LONG, the srcReg must be a floating point register.
+// Otherwise, the register type must be consistent with the given type.
+//
+void CodeGen::genPushReg(var_types type, regNumber srcReg)
+{
+ unsigned size = genTypeSize(type);
+ if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
+ {
+ assert(genIsValidIntReg(srcReg));
+ inst_RV(INS_push, srcReg, type);
+ }
+ else
+ {
+ instruction ins;
+ emitAttr attr = emitTypeSize(type);
+ if (type == TYP_LONG)
+ {
+ // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
+ // This is only used when we are pushing a struct from memory to memory, and basically is
+ // handling an 8-byte "chunk", as opposed to strictly a long type.
+ ins = INS_movq;
+ }
+ else
+ {
+ ins = ins_Store(type);
+ }
+ assert(genIsValidFloatReg(srcReg));
+ inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
+ }
+ genStackLevel += size;
+}
+#endif // _TARGET_X86_
+
#if defined(FEATURE_PUT_STRUCT_ARG_STK)
// genStoreRegToStackArg: Store a register value into the stack argument area
//
@@ -7776,16 +7928,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
#ifdef _TARGET_X86_
if (m_pushStkArg)
{
- if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
- {
- inst_RV(INS_push, srcReg, type);
- }
- else
- {
- inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
- getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
- }
- genStackLevel += size;
+ genPushReg(type, srcReg);
}
else
{
@@ -7834,6 +7977,9 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
case GenTreePutArgStk::Kind::Unroll:
genStructPutArgUnroll(putArgStk);
break;
+ case GenTreePutArgStk::Kind::Push:
+ genStructPutArgUnroll(putArgStk);
+ break;
default:
unreached();
}
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 2647b70d93..9749574d1f 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -11287,19 +11287,62 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
printf(" (last use)");
}
- if (tree->OperIsCopyBlkOp())
+ if (tree->OperIsBlkOp())
{
- printf(" (copy)");
- }
- else if (tree->OperIsInitBlkOp())
- {
- printf(" (init)");
+ if (tree->OperIsCopyBlkOp())
+ {
+ printf(" (copy)");
+ }
+ else if (tree->OperIsInitBlkOp())
+ {
+ printf(" (init)");
+ }
+ if (tree->OperIsStoreBlk() && (tree->AsBlk()->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid))
+ {
+ switch (tree->AsBlk()->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindRepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreeBlk::BlkOpKindHelper:
+ printf(" (Helper)");
+ break;
+ default:
+ unreached();
+ }
+ }
}
else if (tree->OperIsFieldList())
{
printf(" %s at offset %d", varTypeName(tree->AsFieldList()->gtFieldType),
tree->AsFieldList()->gtFieldOffset);
}
+#if FEATURE_PUT_STRUCT_ARG_STK
+ else if ((tree->OperGet() == GT_PUTARG_STK) &&
+ (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid))
+ {
+ switch (tree->AsPutArgStk()->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreePutArgStk::Kind::Unroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ printf(" (Push)");
+ break;
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ printf(" (PushAllSlots)");
+ break;
+ default:
+ unreached();
+ }
+ }
+#endif // FEATURE_PUT_STRUCT_ARG_STK
IndirectAssignmentAnnotation* pIndirAnnote;
if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote))
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index b46a8956d3..6518f91c57 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -566,7 +566,7 @@ public:
bool isContainedIntOrIImmed() const
{
- return isContained() && IsCnsIntOrI();
+ return isContained() && IsCnsIntOrI() && !isContainedSpillTemp();
}
bool isContainedFltOrDblImmed() const
@@ -4649,10 +4649,14 @@ struct GenTreePutArgStk : public GenTreeUnOp
// block node.
enum class Kind : __int8{
- Invalid, RepInstr, Unroll, AllSlots,
+ Invalid, RepInstr, Unroll, Push, PushAllSlots,
};
Kind gtPutArgStkKind;
+ bool isPushKind()
+ {
+ return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots);
+ }
unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
unsigned gtNumberReferenceSlots; // Number of reference slots.
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 2fee13f713..99d301ff91 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2100,6 +2100,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// Now that the fields have been sorted, initialize the LSRA info.
bool allFieldsAreSlots = true;
+ bool needsByteTemp = false;
unsigned prevOffset = putArgStk->getArgSize();
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
@@ -2108,11 +2109,45 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // TODO-X86-CQ: we could probably improve codegen here by marking all of the operands to field nodes that
- // we are going to `push` on the stack as reg-optional.
+ // For x86 we must mark all integral fields as contained or reg-optional, and handle them
+ // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
+ // registers to be consumed atomically by the call.
+ if (varTypeIsIntegralOrI(fieldNode))
+ {
+ if (fieldNode->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
+ if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
+ {
+ SetRegOptional(fieldNode);
+ }
+ else
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ }
+ else if (fieldNode->IsIntCnsFitsInI32())
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ else
+ {
+ // For the case where we cannot directly push the value, if we run out of registers,
+ // it would be better to defer computation until we are pushing the arguments rather
+ // than spilling, but this situation is not all that common, as most cases of promoted
+ // structs do not have a large number of fields, and of those most are lclVars or
+ // copy-propagated constants.
+ SetRegOptional(fieldNode);
+ }
+ }
+ else
+ {
+ assert(varTypeIsFloating(fieldNode));
+ }
- const bool fieldIsSlot =
- varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
+ // We can treat as a slot any field that is stored at a slot boundary, where the previous
+ // field is not in the same slot. (Note that we store the fields in reverse order.)
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
if (!fieldIsSlot)
{
allFieldsAreSlots = false;
@@ -2120,8 +2155,9 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
{
// If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
// (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
- // need a byte-addressable register for the store.
- fieldNode->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_NON_BYTE_REGS);
+ // need a byte-addressable register for the store. We will enforce this requirement on an internal
+ // register, which we can use to copy multiple byte values.
+ needsByteTemp = true;
}
}
@@ -2133,10 +2169,28 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
prevOffset = fieldOffset;
}
- // If all fields of this list are slots, set the copy kind.
+ // Set the copy kind.
+ // TODO-X86-CQ: Even if we are using push, if there are contiguous floating point fields, we should
+ // adjust the stack once for those fields. The latter is really best done in code generation, but
+ // this tuning should probably be undertaken as a whole.
+ // Also, if there are floating point fields, it may be better to use the "Unroll" mode
+ // of copying the struct as a whole, if the fields are not register candidates.
if (allFieldsAreSlots)
{
- putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::AllSlots;
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots;
+ }
+ else
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
+ // If any of the fields cannot be stored with an actual push, we may need a temporary
+ // register to load the value before storing it to the stack location.
+ info->internalIntCount = 1;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+ if (needsByteTemp)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+ info->setInternalCandidates(l, regMask);
}
return;
}
@@ -2218,15 +2272,23 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
info->addInternalCandidates(l, l->internalFloatRegCandidates());
}
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+#ifdef _TARGET_X86_
+ if (size < XMM_REGSIZE_BYTES)
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+ }
}
#ifdef _TARGET_X86_
else if (putArgStk->gtNumberReferenceSlots != 0)
{
// On x86, we must use `push` to store GC references to the stack in order for the emitter to properly update
// the function's GC info. These `putargstk` nodes will generate a sequence of `push` instructions.
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
}
#endif // _TARGET_X86_
else
@@ -2496,6 +2558,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
info->setDstCandidates(l, RBM_RAX);
}
+ bool op2CanBeRegOptional = true;
#ifdef _TARGET_X86_
if (op1->OperGet() == GT_LONG)
{
@@ -2505,7 +2568,9 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
// Src count is actually 3, so increment.
assert(op2->IsCnsIntOrI());
+ assert(tree->OperGet() == GT_UMOD);
info->srcCount++;
+ op2CanBeRegOptional = false;
// This situation also requires an internal register.
info->internalIntCount = 1;
@@ -2526,7 +2591,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
{
MakeSrcContained(tree, op2);
}
- else
+ else if (op2CanBeRegOptional)
{
op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index b0ef498971..bb8c190e9a 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -5724,13 +5724,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
}
}
- LsraLocation nextLocation = assignedInterval->getNextRefLocation();
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ LsraLocation nextLocation = assignedInterval->getNextRefLocation();
// We should never spill a register that's occupied by an Interval with its next use at the current location.
// Normally this won't occur (unless we actually had more uses in a single node than there are registers),
// because we'll always find something with a later nextLocation, but it can happen in stress when
// we have LSRA_SELECT_NEAREST.
- if ((nextLocation == refLocation) && !refPosition->isFixedRegRef)
+ if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister())
{
continue;
}
@@ -5815,7 +5816,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
else
{
// Must have found a spill candidate.
- assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef));
+ assert(farthestRefPhysRegRecord != nullptr);
+ if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef)
+ {
+ Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval;
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ assert(!nextRefPosition->RequiresRegister());
+ }
+ else
+ {
+ assert(farthestLocation > refLocation || refPosition->isFixedRegRef);
+ }
}
#endif