summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2016-10-27 09:37:40 -0700
committerCarol Eidt <carol.eidt@microsoft.com>2016-11-22 17:54:55 -0800
commit900445d95ee524b663e1eb859bca890e6d6eb517 (patch)
tree02aa3b54f3b00b4de8e6b58f596e9d21d230ce41 /src
parente8989c1a75588dede93fcd5e0db173044a850b74 (diff)
downloadcoreclr-900445d95ee524b663e1eb859bca890e6d6eb517.tar.gz
coreclr-900445d95ee524b663e1eb859bca890e6d6eb517.tar.bz2
coreclr-900445d95ee524b663e1eb859bca890e6d6eb517.zip
x86: not all fields of promoted struct need regs
This fixes DevDiv bug 278375, and includes a test case. It also improves the code generation for the general case of promoted structs, though tuning is still needed. The bug was a case where, after decomposing the long fields, we had 7 entries in the GT_FIELD_LIST, each of which was requesting a register. Since Lowering won't know which fields may be in registers, it must be prepared to handle fields in memory. Also, if the fields are not register candidates, we should make them contained and push them directly, for which we may need a temporary register. LSRA is changed to allow RegOptional spills that are used at the same location of the register being allocated. This exposed an issue with a spilled constant being treated as a contained constant (causing an assert because it is not encodable). Change codegen to use push for promoted structs (they are generally small-ish, and pushes of lclVars and constants are cheaper than loading into a register). This could be optimized for consecutive float or double fields. In the process of running jitStressRegs, I found that a test was throwing an exception but reporting success. I fixed the issue causing the exception and also changed the test to report failure if an exception is thrown. Also, on x86, GT_UMOD requires op2 to be in a register, so Lowering must not mark it as RegOptional.
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenlinear.cpp7
-rw-r--r--src/jit/codegenlinear.h4
-rw-r--r--src/jit/codegenxarch.cpp404
-rw-r--r--src/jit/gentree.cpp55
-rw-r--r--src/jit/gentree.h8
-rw-r--r--src/jit/lowerxarch.cpp89
-rw-r--r--src/jit/lsra.cpp17
7 files changed, 428 insertions, 156 deletions
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 68eb2ffe8f..1cff16349c 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -1186,14 +1186,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
#ifdef _TARGET_XARCH_
else if (tree->OperGet() == GT_LCL_VAR)
{
- // A contained lcl var must be living on stack and marked as reg optional.
+ // A contained lcl var must be living on stack and marked as reg optional, or not be a
+ // register candidate.
unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = compiler->lvaTable + varNum;
noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
+ noway_assert(tree->IsRegOptional() || !varDsc->lvLRACandidate);
- // Update the life of reg optional lcl var.
+ // Update the life of the lcl var.
genUpdateLife(tree);
}
#endif // _TARGET_XARCH_
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 6ad72cf781..25ad3f5637 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -167,7 +167,9 @@ void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
#ifdef FEATURE_PUT_STRUCT_ARG_STK
#ifdef _TARGET_X86_
-bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory);
+bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
+void genPushReg(var_types type, regNumber srcReg);
+void genPutArgStkFieldList(GenTreePutArgStk* putArgStk);
#endif // _TARGET_X86_
void genPutStructArgStk(GenTreePutArgStk* treeNode);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 613137e59e..795098975b 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -564,6 +564,8 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
assert(dividend->OperGet() == GT_LONG);
assert(varTypeIsLong(dividend));
+ genConsumeOperands(node);
+
GenTree* const dividendLo = dividend->gtOp1;
GenTree* const dividendHi = dividend->gtOp2;
assert(!dividendLo->isContained());
@@ -574,8 +576,6 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
assert(divisor->gtIconVal >= 2);
assert(divisor->gtIconVal <= 0x3fffffff);
- genConsumeOperands(node);
-
// dividendLo must be in RAX; dividendHi must be in RDX
genCopyRegIfNeeded(dividendLo, REG_EAX);
genCopyRegIfNeeded(dividendHi, REG_EDX);
@@ -3349,6 +3349,8 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
regNumber intTmpReg = REG_NA;
regNumber longTmpReg = REG_NA;
#ifdef _TARGET_X86_
+ // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
+ // less than 16 bytes, we will just be using pushes
if (size >= 8)
{
xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
@@ -3359,6 +3361,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
}
#else // !_TARGET_X86_
+ // On x64 we use an XMM register only for 16-byte chunks.
if (size >= XMM_REGSIZE_BYTES)
{
xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
@@ -7486,29 +7489,234 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
//
// Arguments:
// putArgStk - the putArgStk node.
-// isSrcInMemory - true if the source of the putArgStk node is in
-// memory; false otherwise.
//
// Returns: true if the stack pointer was adjusted; false otherwise.
//
-bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory)
+bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
- assert(isSrcInMemory || (putArgStk->gtOp1->OperGet() == GT_FIELD_LIST));
-
- // If this argument contains any GC pointers or is less than 16 bytes in size and is either in memory or composed
- // entirely of slot-like fields (i.e. integral-types, 4-byte-aligned fields that take up 4 bytes including any
- // padding), we will use a sequence of `push` instructions to store the argument to the stack.
const unsigned argSize = putArgStk->getArgSize();
- if ((putArgStk->gtNumberReferenceSlots != 0) ||
- ((argSize < 16) && (isSrcInMemory || (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::AllSlots))))
+
+ // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
+ // This is set in Lowering, and is true if and only if:
+ // - This argument contains any GC pointers OR
+ // - It is a GT_FIELD_LIST OR
+ // - It is less than 16 bytes in size.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ switch (putArgStk->gtPutArgStkKind)
{
- return false;
+ case GenTreePutArgStk::Kind::RepInstr:
+ case GenTreePutArgStk::Kind::Unroll:
+ assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
+ (argSize >= 16));
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
+ (argSize < 16));
+ break;
+ case GenTreePutArgStk::Kind::Invalid:
+ default:
+ assert(!"Uninitialized GenTreePutArgStk::Kind");
+ break;
}
+#endif // DEBUG
+ if (putArgStk->isPushKind())
+ {
+ m_pushStkArg = true;
+ return false;
+ }
+ m_pushStkArg = false;
inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
genStackLevel += argSize;
return true;
}
+
+//---------------------------------------------------------------------
+// genPutArgStkFieldList - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
+{
+ GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
+ assert(fieldList != nullptr);
+
+ // Set m_pushStkArg and pre-adjust the stack if necessary.
+ const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+ // For now, we only support the "push" case; we will push a full slot for the first field of each slot
+ // within the struct.
+ assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
+
+ // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // (Note that this mode is not currently being used.)
+ // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
+ // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
+ // a multiple of the target pointer size).
+ unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
+ unsigned prevFieldOffset = currentOffset;
+ regNumber tmpReg = REG_NA;
+ if (putArgStk->gtRsvdRegs != RBM_NONE)
+ {
+ assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+ }
+ for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ var_types fieldType = current->gtFieldType;
+
+ // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
+ // field list in descending order by offset.
+ assert(!varTypeIsLong(fieldType));
+ assert(fieldOffset <= prevFieldOffset);
+
+ // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
+ // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
+ // assigned a register, and which is therefore contained.
+ // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
+ genConsumeRegs(fieldNode);
+ regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+
+ // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
+ //
+ // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
+ // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
+ // not require rounding.
+ // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
+ // able to detect stores into the outgoing argument area of the stack on x86.
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
+ int adjustment = roundUp(currentOffset - fieldOffset, 4);
+ if (fieldIsSlot)
+ {
+ fieldType = genActualType(fieldType);
+ unsigned pushSize = genTypeSize(fieldType);
+ assert((pushSize % 4) == 0);
+ adjustment -= pushSize;
+ while (adjustment != 0)
+ {
+ inst_IV(INS_push, 0);
+ currentOffset -= pushSize;
+ genStackLevel += pushSize;
+ adjustment -= pushSize;
+ }
+ m_pushStkArg = true;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ // We always "push" floating point fields (i.e. they are full slot values that don't
+ // require special handling).
+ assert(varTypeIsIntegralOrI(fieldNode));
+ // If we can't push this field, it needs to be in a register so that we can store
+ // it to the stack location.
+ assert(tmpReg != REG_NA);
+ if (adjustment != 0)
+ {
+ // This moves the stack pointer to fieldOffset.
+ // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
+ // Adjust the stack pointer to the next slot boundary.
+ inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
+ currentOffset -= adjustment;
+ genStackLevel += adjustment;
+ }
+
+ // Does it need to be in a byte register?
+ // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If it's already in a register, but not a byteable one, then move it.
+ if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
+ {
+ noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ if (argReg != REG_NA)
+ {
+ inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
+ argReg = tmpReg;
+ }
+ }
+ }
+
+ if (argReg == REG_NA)
+ {
+ if (m_pushStkArg)
+ {
+ if (fieldNode->isContainedSpillTemp())
+ {
+ assert(fieldNode->IsRegOptional());
+ TempDsc* tmp = getSpillTempDsc(fieldNode);
+ getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
+ compiler->tmpRlsTemp(tmp);
+ }
+ else
+ {
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
+ break;
+ case GT_CNS_INT:
+ if (fieldNode->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ break;
+ default:
+ unreached();
+ }
+ }
+ currentOffset -= TARGET_POINTER_SIZE;
+ genStackLevel += TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ break;
+ case GT_CNS_INT:
+ genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ break;
+ default:
+ unreached();
+ }
+ genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ }
+ }
+ else
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ if (m_pushStkArg)
+ {
+ // We always push a slot-rounded size
+ currentOffset -= genTypeSize(fieldType);
+ }
+ }
+
+ prevFieldOffset = fieldOffset;
+ }
+ if (currentOffset != 0)
+ {
+ // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
+ inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
+ genStackLevel += currentOffset;
+ }
+}
#endif // _TARGET_X86_
//---------------------------------------------------------------------
@@ -7527,7 +7735,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
if (varTypeIsStruct(targetType))
{
- m_pushStkArg = !genAdjustStackForPutArgStk(putArgStk, true);
+ (void)genAdjustStackForPutArgStk(putArgStk);
genPutStructArgStk(putArgStk);
return;
}
@@ -7541,10 +7749,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
// a separate putarg_stk for each of the upper and lower halves.
noway_assert(targetType != TYP_LONG);
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
+ const unsigned argSize = putArgStk->getArgSize();
+ assert((argSize % TARGET_POINTER_SIZE) == 0);
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContainedIntOrIImmed())
{
if (data->IsIconHandle())
@@ -7555,115 +7762,18 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
{
inst_IV(INS_push, data->gtIntCon.gtIconVal);
}
+ genStackLevel += argSize;
}
else if (data->OperGet() == GT_FIELD_LIST)
{
- GenTreeFieldList* const fieldList = data->AsFieldList();
- assert(fieldList != nullptr);
-
- m_pushStkArg = false;
- const int argSize = putArgStk->getArgSize();
- assert((argSize % TARGET_POINTER_SIZE) == 0);
-
- const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk, false);
-
- // If the stack was not pre-adjusted, set the current field offset to the size of the struct arg (which must be
- // a multiple of the target pointer size). Otherwise, set the offset to 0.
- int currentOffset = preAdjustedStack ? 0 : argSize;
- unsigned prevFieldOffset = argSize;
- for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
- {
- GenTree* const fieldNode = current->Current();
- const unsigned fieldOffset = current->gtFieldOffset;
- var_types fieldType = current->gtFieldType;
-
- // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
- // field list in descending order by offset.
- assert(!varTypeIsLong(fieldType));
- assert(fieldOffset <= prevFieldOffset);
-
- // TODO-X86-CQ: If this is not a register candidate, or is not in a register,
- // make it contained.
- genConsumeReg(fieldNode);
-
- // If the field is slot-like, we can store the entire register no matter the type.
- const bool fieldIsSlot =
- varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
- if (fieldIsSlot)
- {
- fieldType = genActualType(fieldType);
- assert(genTypeSize(fieldType) == 4);
- }
-
- // We can use a push instruction for any slot-like field.
- //
- // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
- // able to detect stores into the outgoing argument area of the stack on x86.
- const bool usePush = !preAdjustedStack && fieldIsSlot;
- assert(usePush || !varTypeIsGC(fieldType));
-
- // Adjust the stack if necessary. If we are going to generate a `push` instruction, this moves the stack
- // pointer to (fieldOffset + sizeof(fieldType)) to account for the `push`.
- const int fieldSize = genTypeSize(fieldType);
- const int desiredOffset = current->gtFieldOffset + (usePush ? fieldSize : 0);
- if (currentOffset > desiredOffset)
- {
- assert(!preAdjustedStack);
-
- // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
- // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
- // not require rounding.
- const int adjustment = roundUp(currentOffset - desiredOffset, 4);
- assert(!usePush || (adjustment == (currentOffset - desiredOffset)));
- inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
- currentOffset -= adjustment;
- genStackLevel += adjustment;
- }
-
- // Note that the argReg may not be the lcl->gtRegNum, if it has been copied
- // or reloaded to a different register.
- const regNumber argReg = fieldNode->gtRegNum;
- if (usePush)
- {
- // Adjust the stack if necessary and push the field.
- // Push the field.
- inst_RV(INS_push, argReg, fieldType, emitTypeSize(fieldType));
- currentOffset -= fieldSize;
- genStackLevel += fieldSize;
- }
- else
- {
- assert(!m_pushStkArg);
- genStoreRegToStackArg(fieldType, argReg, desiredOffset - currentOffset);
- }
-
- prevFieldOffset = fieldOffset;
- }
-
- // Adjust the stack if necessary.
- if (currentOffset != 0)
- {
- inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
- genStackLevel += currentOffset;
- }
- }
- else if (data->isContained())
- {
- NYI_X86("Contained putarg_stk of non-constant");
+ genPutArgStkFieldList(putArgStk);
}
else
{
+ // We should not see any contained nodes that are not immediates.
+ assert(!data->isContained());
genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
+ genPushReg(targetType, data->gtRegNum);
}
#else // !_TARGET_X86_
{
@@ -7712,6 +7822,48 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#endif // !_TARGET_X86_
}
+#ifdef _TARGET_X86_
+// genPushReg: Push a register value onto the stack and adjust the stack level
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+//
+// Notes:
+// For TYP_LONG, the srcReg must be a floating point register.
+// Otherwise, the register type must be consistent with the given type.
+//
+void CodeGen::genPushReg(var_types type, regNumber srcReg)
+{
+ unsigned size = genTypeSize(type);
+ if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
+ {
+ assert(genIsValidIntReg(srcReg));
+ inst_RV(INS_push, srcReg, type);
+ }
+ else
+ {
+ instruction ins;
+ emitAttr attr = emitTypeSize(type);
+ if (type == TYP_LONG)
+ {
+ // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
+ // This is only used when we are pushing a struct from memory to memory, and basically is
+ // handling an 8-byte "chunk", as opposed to strictly a long type.
+ ins = INS_movq;
+ }
+ else
+ {
+ ins = ins_Store(type);
+ }
+ assert(genIsValidFloatReg(srcReg));
+ inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
+ }
+ genStackLevel += size;
+}
+#endif // _TARGET_X86_
+
#if defined(FEATURE_PUT_STRUCT_ARG_STK)
// genStoreRegToStackArg: Store a register value into the stack argument area
//
@@ -7776,16 +7928,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
#ifdef _TARGET_X86_
if (m_pushStkArg)
{
- if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
- {
- inst_RV(INS_push, srcReg, type);
- }
- else
- {
- inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
- getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
- }
- genStackLevel += size;
+ genPushReg(type, srcReg);
}
else
{
@@ -7834,6 +7977,9 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
case GenTreePutArgStk::Kind::Unroll:
genStructPutArgUnroll(putArgStk);
break;
+ case GenTreePutArgStk::Kind::Push:
+ genStructPutArgUnroll(putArgStk);
+ break;
default:
unreached();
}
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 2647b70d93..9749574d1f 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -11287,19 +11287,62 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
printf(" (last use)");
}
- if (tree->OperIsCopyBlkOp())
+ if (tree->OperIsBlkOp())
{
- printf(" (copy)");
- }
- else if (tree->OperIsInitBlkOp())
- {
- printf(" (init)");
+ if (tree->OperIsCopyBlkOp())
+ {
+ printf(" (copy)");
+ }
+ else if (tree->OperIsInitBlkOp())
+ {
+ printf(" (init)");
+ }
+ if (tree->OperIsStoreBlk() && (tree->AsBlk()->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid))
+ {
+ switch (tree->AsBlk()->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindRepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreeBlk::BlkOpKindHelper:
+ printf(" (Helper)");
+ break;
+ default:
+ unreached();
+ }
+ }
}
else if (tree->OperIsFieldList())
{
printf(" %s at offset %d", varTypeName(tree->AsFieldList()->gtFieldType),
tree->AsFieldList()->gtFieldOffset);
}
+#if FEATURE_PUT_STRUCT_ARG_STK
+ else if ((tree->OperGet() == GT_PUTARG_STK) &&
+ (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid))
+ {
+ switch (tree->AsPutArgStk()->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreePutArgStk::Kind::Unroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ printf(" (Push)");
+ break;
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ printf(" (PushAllSlots)");
+ break;
+ default:
+ unreached();
+ }
+ }
+#endif // FEATURE_PUT_STRUCT_ARG_STK
IndirectAssignmentAnnotation* pIndirAnnote;
if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote))
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index b46a8956d3..6518f91c57 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -566,7 +566,7 @@ public:
bool isContainedIntOrIImmed() const
{
- return isContained() && IsCnsIntOrI();
+ return isContained() && IsCnsIntOrI() && !isContainedSpillTemp();
}
bool isContainedFltOrDblImmed() const
@@ -4649,10 +4649,14 @@ struct GenTreePutArgStk : public GenTreeUnOp
// block node.
enum class Kind : __int8{
- Invalid, RepInstr, Unroll, AllSlots,
+ Invalid, RepInstr, Unroll, Push, PushAllSlots,
};
Kind gtPutArgStkKind;
+ bool isPushKind()
+ {
+ return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots);
+ }
unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
unsigned gtNumberReferenceSlots; // Number of reference slots.
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 2fee13f713..99d301ff91 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2100,6 +2100,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// Now that the fields have been sorted, initialize the LSRA info.
bool allFieldsAreSlots = true;
+ bool needsByteTemp = false;
unsigned prevOffset = putArgStk->getArgSize();
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
@@ -2108,11 +2109,45 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // TODO-X86-CQ: we could probably improve codegen here by marking all of the operands to field nodes that
- // we are going to `push` on the stack as reg-optional.
+ // For x86 we must mark all integral fields as contained or reg-optional, and handle them
+ // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
+ // registers to be consumed atomically by the call.
+ if (varTypeIsIntegralOrI(fieldNode))
+ {
+ if (fieldNode->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
+ if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
+ {
+ SetRegOptional(fieldNode);
+ }
+ else
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ }
+ else if (fieldNode->IsIntCnsFitsInI32())
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ else
+ {
+ // For the case where we cannot directly push the value, if we run out of registers,
+ // it would be better to defer computation until we are pushing the arguments rather
+ // than spilling, but this situation is not all that common, as most cases of promoted
+ // structs do not have a large number of fields, and of those most are lclVars or
+ // copy-propagated constants.
+ SetRegOptional(fieldNode);
+ }
+ }
+ else
+ {
+ assert(varTypeIsFloating(fieldNode));
+ }
- const bool fieldIsSlot =
- varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
+ // We can treat as a slot any field that is stored at a slot boundary, where the previous
+ // field is not in the same slot. (Note that we store the fields in reverse order.)
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
if (!fieldIsSlot)
{
allFieldsAreSlots = false;
@@ -2120,8 +2155,9 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
{
// If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
// (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
- // need a byte-addressable register for the store.
- fieldNode->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_NON_BYTE_REGS);
+ // need a byte-addressable register for the store. We will enforce this requirement on an internal
+ // register, which we can use to copy multiple byte values.
+ needsByteTemp = true;
}
}
@@ -2133,10 +2169,28 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
prevOffset = fieldOffset;
}
- // If all fields of this list are slots, set the copy kind.
+ // Set the copy kind.
+ // TODO-X86-CQ: Even if we are using push, if there are contiguous floating point fields, we should
+ // adjust the stack once for those fields. The latter is really best done in code generation, but
+ // this tuning should probably be undertaken as a whole.
+ // Also, if there are floating point fields, it may be better to use the "Unroll" mode
+ // of copying the struct as a whole, if the fields are not register candidates.
if (allFieldsAreSlots)
{
- putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::AllSlots;
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots;
+ }
+ else
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
+ // If any of the fields cannot be stored with an actual push, we may need a temporary
+ // register to load the value before storing it to the stack location.
+ info->internalIntCount = 1;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+ if (needsByteTemp)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+ info->setInternalCandidates(l, regMask);
}
return;
}
@@ -2218,15 +2272,23 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
info->addInternalCandidates(l, l->internalFloatRegCandidates());
}
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+#ifdef _TARGET_X86_
+ if (size < XMM_REGSIZE_BYTES)
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+ }
}
#ifdef _TARGET_X86_
else if (putArgStk->gtNumberReferenceSlots != 0)
{
// On x86, we must use `push` to store GC references to the stack in order for the emitter to properly update
// the function's GC info. These `putargstk` nodes will generate a sequence of `push` instructions.
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
}
#endif // _TARGET_X86_
else
@@ -2496,6 +2558,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
info->setDstCandidates(l, RBM_RAX);
}
+ bool op2CanBeRegOptional = true;
#ifdef _TARGET_X86_
if (op1->OperGet() == GT_LONG)
{
@@ -2505,7 +2568,9 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
// Src count is actually 3, so increment.
assert(op2->IsCnsIntOrI());
+ assert(tree->OperGet() == GT_UMOD);
info->srcCount++;
+ op2CanBeRegOptional = false;
// This situation also requires an internal register.
info->internalIntCount = 1;
@@ -2526,7 +2591,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
{
MakeSrcContained(tree, op2);
}
- else
+ else if (op2CanBeRegOptional)
{
op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index b0ef498971..bb8c190e9a 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -5724,13 +5724,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
}
}
- LsraLocation nextLocation = assignedInterval->getNextRefLocation();
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ LsraLocation nextLocation = assignedInterval->getNextRefLocation();
// We should never spill a register that's occupied by an Interval with its next use at the current location.
// Normally this won't occur (unless we actually had more uses in a single node than there are registers),
// because we'll always find something with a later nextLocation, but it can happen in stress when
// we have LSRA_SELECT_NEAREST.
- if ((nextLocation == refLocation) && !refPosition->isFixedRegRef)
+ if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister())
{
continue;
}
@@ -5815,7 +5816,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
else
{
// Must have found a spill candidate.
- assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef));
+ assert(farthestRefPhysRegRecord != nullptr);
+ if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef)
+ {
+ Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval;
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ assert(!nextRefPosition->RequiresRegister());
+ }
+ else
+ {
+ assert(farthestLocation > refLocation || refPosition->isFixedRegRef);
+ }
}
#endif