diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenlinear.cpp | 7 | ||||
-rw-r--r-- | src/jit/codegenlinear.h | 4 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 404 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 55 | ||||
-rw-r--r-- | src/jit/gentree.h | 8 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 89 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 17 |
7 files changed, 428 insertions, 156 deletions
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index 68eb2ffe8f..1cff16349c 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -1186,14 +1186,15 @@ void CodeGen::genConsumeRegs(GenTree* tree) #ifdef _TARGET_XARCH_ else if (tree->OperGet() == GT_LCL_VAR) { - // A contained lcl var must be living on stack and marked as reg optional. + // A contained lcl var must be living on stack and marked as reg optional, or not be a + // register candidate. unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); LclVarDsc* varDsc = compiler->lvaTable + varNum; noway_assert(varDsc->lvRegNum == REG_STK); - noway_assert(tree->IsRegOptional()); + noway_assert(tree->IsRegOptional() || !varDsc->lvLRACandidate); - // Update the life of reg optional lcl var. + // Update the life of the lcl var. genUpdateLife(tree); } #endif // _TARGET_XARCH_ diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 6ad72cf781..25ad3f5637 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -167,7 +167,9 @@ void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode); #ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef _TARGET_X86_ -bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory); +bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); +void genPushReg(var_types type, regNumber srcReg); +void genPutArgStkFieldList(GenTreePutArgStk* putArgStk); #endif // _TARGET_X86_ void genPutStructArgStk(GenTreePutArgStk* treeNode); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 613137e59e..795098975b 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -564,6 +564,8 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) assert(dividend->OperGet() == GT_LONG); assert(varTypeIsLong(dividend)); + genConsumeOperands(node); + GenTree* const dividendLo = dividend->gtOp1; GenTree* const dividendHi = dividend->gtOp2; assert(!dividendLo->isContained()); @@ -574,8 +576,6 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) assert(divisor->gtIconVal >= 2); assert(divisor->gtIconVal <= 0x3fffffff); - genConsumeOperands(node); - // dividendLo must be in RAX; dividendHi must be in RDX genCopyRegIfNeeded(dividendLo, REG_EAX); genCopyRegIfNeeded(dividendHi, REG_EDX); @@ -3349,6 +3349,8 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) regNumber intTmpReg = REG_NA; regNumber longTmpReg = REG_NA; #ifdef _TARGET_X86_ + // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's + // less than 16 bytes, we will just be using pushes if (size >= 8) { xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); @@ -3359,6 +3361,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); } #else // !_TARGET_X86_ + // On x64 we use an XMM register only for 16-byte chunks. if (size >= XMM_REGSIZE_BYTES) { xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); @@ -7486,29 +7489,234 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) // // Arguments: // putArgStk - the putArgStk node. -// isSrcInMemory - true if the source of the putArgStk node is in -// memory; false otherwise. // // Returns: true if the stack pointer was adjusted; false otherwise. // -bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk, bool isSrcInMemory) +bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) { - assert(isSrcInMemory || (putArgStk->gtOp1->OperGet() == GT_FIELD_LIST)); - - // If this argument contains any GC pointers or is less than 16 bytes in size and is either in memory or composed - // entirely of slot-like fields (i.e. integral-types, 4-byte-aligned fields that take up 4 bytes including any - // padding), we will use a sequence of `push` instructions to store the argument to the stack. const unsigned argSize = putArgStk->getArgSize(); - if ((putArgStk->gtNumberReferenceSlots != 0) || - ((argSize < 16) && (isSrcInMemory || (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::AllSlots)))) + + // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack. + // This is set in Lowering, and is true if and only if: + // - This argument contains any GC pointers OR + // - It is a GT_FIELD_LIST OR + // - It is less than 16 bytes in size. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + switch (putArgStk->gtPutArgStkKind) { - return false; + case GenTreePutArgStk::Kind::RepInstr: + case GenTreePutArgStk::Kind::Unroll: + assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) && + (argSize >= 16)); + break; + case GenTreePutArgStk::Kind::Push: + case GenTreePutArgStk::Kind::PushAllSlots: + assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) || + (argSize < 16)); + break; + case GenTreePutArgStk::Kind::Invalid: + default: + assert(!"Uninitialized GenTreePutArgStk::Kind"); + break; } +#endif // DEBUG + if (putArgStk->isPushKind()) + { + m_pushStkArg = true; + return false; + } + m_pushStkArg = false; inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); genStackLevel += argSize; return true; } + +//--------------------------------------------------------------------- +// genPutArgStkFieldList - generate code for passing an arg on the stack. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// targetType - the type of the treeNode +// +// Return value: +// None +// +void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) +{ + GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList(); + assert(fieldList != nullptr); + + // Set m_pushStkArg and pre-adjust the stack if necessary. + const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk); + // For now, we only support the "push" case; we will push a full slot for the first field of each slot + // within the struct. + assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg); + + // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0. + // (Note that this mode is not currently being used.) + // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them + // in reverse order, so we start with the current field offset at the size of the struct arg (which must be + // a multiple of the target pointer size). + unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize(); + unsigned prevFieldOffset = currentOffset; + regNumber tmpReg = REG_NA; + if (putArgStk->gtRsvdRegs != RBM_NONE) + { + assert(genCountBits(putArgStk->gtRsvdRegs) == 1); + tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs); + assert(genIsValidIntReg(tmpReg)); + } + for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest()) + { + GenTree* const fieldNode = current->Current(); + const unsigned fieldOffset = current->gtFieldOffset; + var_types fieldType = current->gtFieldType; + + // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the + // field list in descending order by offset. + assert(!varTypeIsLong(fieldType)); + assert(fieldOffset <= prevFieldOffset); + + // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately + // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been + // assigned a register, and which is therefore contained. + // Unlike genConsumeReg(), it handles the case where no registers are being consumed. + genConsumeRegs(fieldNode); + regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum; + + // If the field is slot-like, we can use a push instruction to store the entire register no matter the type. + // + // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up + // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must + // not require rounding. + // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise + // able to detect stores into the outgoing argument area of the stack on x86. + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); + int adjustment = roundUp(currentOffset - fieldOffset, 4); + if (fieldIsSlot) + { + fieldType = genActualType(fieldType); + unsigned pushSize = genTypeSize(fieldType); + assert((pushSize % 4) == 0); + adjustment -= pushSize; + while (adjustment != 0) + { + inst_IV(INS_push, 0); + currentOffset -= pushSize; + genStackLevel += pushSize; + adjustment -= pushSize; + } + m_pushStkArg = true; + } + else + { + m_pushStkArg = false; + // We always "push" floating point fields (i.e. they are full slot values that don't + // require special handling). + assert(varTypeIsIntegralOrI(fieldNode)); + // If we can't push this field, it needs to be in a register so that we can store + // it to the stack location. + assert(tmpReg != REG_NA); + if (adjustment != 0) + { + // This moves the stack pointer to fieldOffset. + // For this case, we must adjust the stack and generate stack-relative stores rather than pushes. + // Adjust the stack pointer to the next slot boundary. + inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE); + currentOffset -= adjustment; + genStackLevel += adjustment; + } + + // Does it need to be in a byte register? + // If so, we'll use tmpReg, which must have been allocated as a byte register. + // If it's already in a register, but not a byteable one, then move it. + if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0))) + { + noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0); + if (argReg != REG_NA) + { + inst_RV_RV(INS_mov, tmpReg, argReg, fieldType); + argReg = tmpReg; + } + } + } + + if (argReg == REG_NA) + { + if (m_pushStkArg) + { + if (fieldNode->isContainedSpillTemp()) + { + assert(fieldNode->IsRegOptional()); + TempDsc* tmp = getSpillTempDsc(fieldNode); + getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0); + compiler->tmpRlsTemp(tmp); + } + else + { + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet())); + break; + case GT_CNS_INT: + if (fieldNode->IsIconHandle()) + { + inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal); + } + else + { + inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal); + } + break; + default: + unreached(); + } + } + currentOffset -= TARGET_POINTER_SIZE; + genStackLevel += TARGET_POINTER_SIZE; + } + else + { + // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack. + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_RV_TT(INS_mov, tmpReg, fieldNode); + break; + case GT_CNS_INT: + genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode); + break; + default: + unreached(); + } + genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset); + } + } + else + { + genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); + if (m_pushStkArg) + { + // We always push a slot-rounded size + currentOffset -= genTypeSize(fieldType); + } + } + + prevFieldOffset = fieldOffset; + } + if (currentOffset != 0) + { + // We don't expect padding at the beginning of a struct, but it could happen with explicit layout. + inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE); + genStackLevel += currentOffset; + } +} #endif // _TARGET_X86_ //--------------------------------------------------------------------- @@ -7527,7 +7735,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) #ifdef _TARGET_X86_ if (varTypeIsStruct(targetType)) { - m_pushStkArg = !genAdjustStackForPutArgStk(putArgStk, true); + (void)genAdjustStackForPutArgStk(putArgStk); genPutStructArgStk(putArgStk); return; } @@ -7541,10 +7749,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) // a separate putarg_stk for each of the upper and lower halves. noway_assert(targetType != TYP_LONG); - int argSize = genTypeSize(genActualType(targetType)); - genStackLevel += argSize; + const unsigned argSize = putArgStk->getArgSize(); + assert((argSize % TARGET_POINTER_SIZE) == 0); - // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? if (data->isContainedIntOrIImmed()) { if (data->IsIconHandle()) @@ -7555,115 +7762,18 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) { inst_IV(INS_push, data->gtIntCon.gtIconVal); } + genStackLevel += argSize; } else if (data->OperGet() == GT_FIELD_LIST) { - GenTreeFieldList* const fieldList = data->AsFieldList(); - assert(fieldList != nullptr); - - m_pushStkArg = false; - const int argSize = putArgStk->getArgSize(); - assert((argSize % TARGET_POINTER_SIZE) == 0); - - const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk, false); - - // If the stack was not pre-adjusted, set the current field offset to the size of the struct arg (which must be - // a multiple of the target pointer size). Otherwise, set the offset to 0. - int currentOffset = preAdjustedStack ? 0 : argSize; - unsigned prevFieldOffset = argSize; - for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest()) - { - GenTree* const fieldNode = current->Current(); - const unsigned fieldOffset = current->gtFieldOffset; - var_types fieldType = current->gtFieldType; - - // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the - // field list in descending order by offset. - assert(!varTypeIsLong(fieldType)); - assert(fieldOffset <= prevFieldOffset); - - // TODO-X86-CQ: If this is not a register candidate, or is not in a register, - // make it contained. - genConsumeReg(fieldNode); - - // If the field is slot-like, we can store the entire register no matter the type. - const bool fieldIsSlot = - varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); - if (fieldIsSlot) - { - fieldType = genActualType(fieldType); - assert(genTypeSize(fieldType) == 4); - } - - // We can use a push instruction for any slot-like field. - // - // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise - // able to detect stores into the outgoing argument area of the stack on x86. - const bool usePush = !preAdjustedStack && fieldIsSlot; - assert(usePush || !varTypeIsGC(fieldType)); - - // Adjust the stack if necessary. If we are going to generate a `push` instruction, this moves the stack - // pointer to (fieldOffset + sizeof(fieldType)) to account for the `push`. - const int fieldSize = genTypeSize(fieldType); - const int desiredOffset = current->gtFieldOffset + (usePush ? fieldSize : 0); - if (currentOffset > desiredOffset) - { - assert(!preAdjustedStack); - - // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up - // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must - // not require rounding. - const int adjustment = roundUp(currentOffset - desiredOffset, 4); - assert(!usePush || (adjustment == (currentOffset - desiredOffset))); - inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE); - currentOffset -= adjustment; - genStackLevel += adjustment; - } - - // Note that the argReg may not be the lcl->gtRegNum, if it has been copied - // or reloaded to a different register. - const regNumber argReg = fieldNode->gtRegNum; - if (usePush) - { - // Adjust the stack if necessary and push the field. - // Push the field. - inst_RV(INS_push, argReg, fieldType, emitTypeSize(fieldType)); - currentOffset -= fieldSize; - genStackLevel += fieldSize; - } - else - { - assert(!m_pushStkArg); - genStoreRegToStackArg(fieldType, argReg, desiredOffset - currentOffset); - } - - prevFieldOffset = fieldOffset; - } - - // Adjust the stack if necessary. - if (currentOffset != 0) - { - inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE); - genStackLevel += currentOffset; - } - } - else if (data->isContained()) - { - NYI_X86("Contained putarg_stk of non-constant"); + genPutArgStkFieldList(putArgStk); } else { + // We should not see any contained nodes that are not immediates. + assert(!data->isContained()); genConsumeReg(data); - if (varTypeIsIntegralOrI(targetType)) - { - inst_RV(INS_push, data->gtRegNum, targetType); - } - else - { - // Decrement SP. - inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); - getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); - } + genPushReg(targetType, data->gtRegNum); } #else // !_TARGET_X86_ { @@ -7712,6 +7822,48 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) #endif // !_TARGET_X86_ } +#ifdef _TARGET_X86_ +// genPushReg: Push a register value onto the stack and adjust the stack level +// +// Arguments: +// type - the type of value to be stored +// reg - the register containing the value +// +// Notes: +// For TYP_LONG, the srcReg must be a floating point register. +// Otherwise, the register type must be consistent with the given type. +// +void CodeGen::genPushReg(var_types type, regNumber srcReg) +{ + unsigned size = genTypeSize(type); + if (varTypeIsIntegralOrI(type) && type != TYP_LONG) + { + assert(genIsValidIntReg(srcReg)); + inst_RV(INS_push, srcReg, type); + } + else + { + instruction ins; + emitAttr attr = emitTypeSize(type); + if (type == TYP_LONG) + { + // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg. + // This is only used when we are pushing a struct from memory to memory, and basically is + // handling an 8-byte "chunk", as opposed to strictly a long type. + ins = INS_movq; + } + else + { + ins = ins_Store(type); + } + assert(genIsValidFloatReg(srcReg)); + inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE); + getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0); + } + genStackLevel += size; +} +#endif // _TARGET_X86_ + #if defined(FEATURE_PUT_STRUCT_ARG_STK) // genStoreRegToStackArg: Store a register value into the stack argument area // @@ -7776,16 +7928,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset #ifdef _TARGET_X86_ if (m_pushStkArg) { - if (varTypeIsIntegralOrI(type) && type != TYP_LONG) - { - inst_RV(INS_push, srcReg, type); - } - else - { - inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE); - getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0); - } - genStackLevel += size; + genPushReg(type, srcReg); } else { @@ -7834,6 +7977,9 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) case GenTreePutArgStk::Kind::Unroll: genStructPutArgUnroll(putArgStk); break; + case GenTreePutArgStk::Kind::Push: + genStructPutArgUnroll(putArgStk); + break; default: unreached(); } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 2647b70d93..9749574d1f 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -11287,19 +11287,62 @@ void Compiler::gtDispTree(GenTreePtr tree, { printf(" (last use)"); } - if (tree->OperIsCopyBlkOp()) + if (tree->OperIsBlkOp()) { - printf(" (copy)"); - } - else if (tree->OperIsInitBlkOp()) - { - printf(" (init)"); + if (tree->OperIsCopyBlkOp()) + { + printf(" (copy)"); + } + else if (tree->OperIsInitBlkOp()) + { + printf(" (init)"); + } + if (tree->OperIsStoreBlk() && (tree->AsBlk()->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid)) + { + switch (tree->AsBlk()->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindRepInstr: + printf(" (RepInstr)"); + break; + case GenTreeBlk::BlkOpKindUnroll: + printf(" (Unroll)"); + break; + case GenTreeBlk::BlkOpKindHelper: + printf(" (Helper)"); + break; + default: + unreached(); + } + } } else if (tree->OperIsFieldList()) { printf(" %s at offset %d", varTypeName(tree->AsFieldList()->gtFieldType), tree->AsFieldList()->gtFieldOffset); } +#if FEATURE_PUT_STRUCT_ARG_STK + else if ((tree->OperGet() == GT_PUTARG_STK) && + (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid)) + { + switch (tree->AsPutArgStk()->gtPutArgStkKind) + { + case GenTreePutArgStk::Kind::RepInstr: + printf(" (RepInstr)"); + break; + case GenTreePutArgStk::Kind::Unroll: + printf(" (Unroll)"); + break; + case GenTreePutArgStk::Kind::Push: + printf(" (Push)"); + break; + case GenTreePutArgStk::Kind::PushAllSlots: + printf(" (PushAllSlots)"); + break; + default: + unreached(); + } + } +#endif // FEATURE_PUT_STRUCT_ARG_STK IndirectAssignmentAnnotation* pIndirAnnote; if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote)) diff --git a/src/jit/gentree.h b/src/jit/gentree.h index b46a8956d3..6518f91c57 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -566,7 +566,7 @@ public: bool isContainedIntOrIImmed() const { - return isContained() && IsCnsIntOrI(); + return isContained() && IsCnsIntOrI() && !isContainedSpillTemp(); } bool isContainedFltOrDblImmed() const @@ -4649,10 +4649,14 @@ struct GenTreePutArgStk : public GenTreeUnOp // block node. enum class Kind : __int8{ - Invalid, RepInstr, Unroll, AllSlots, + Invalid, RepInstr, Unroll, Push, PushAllSlots, }; Kind gtPutArgStkKind; + bool isPushKind() + { + return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots); + } unsigned gtNumSlots; // Number of slots for the argument to be passed on stack unsigned gtNumberReferenceSlots; // Number of reference slots. diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 2fee13f713..99d301ff91 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2100,6 +2100,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // Now that the fields have been sorted, initialize the LSRA info. bool allFieldsAreSlots = true; + bool needsByteTemp = false; unsigned prevOffset = putArgStk->getArgSize(); for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest()) { @@ -2108,11 +2109,45 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) const unsigned fieldOffset = current->gtFieldOffset; assert(fieldType != TYP_LONG); - // TODO-X86-CQ: we could probably improve codegen here by marking all of the operands to field nodes that - // we are going to `push` on the stack as reg-optional. + // For x86 we must mark all integral fields as contained or reg-optional, and handle them + // accordingly in code generation, since we may have up to 8 fields, which cannot all be in + // registers to be consumed atomically by the call. + if (varTypeIsIntegralOrI(fieldNode)) + { + if (fieldNode->OperGet() == GT_LCL_VAR) + { + LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]); + if (varDsc->lvTracked && !varDsc->lvDoNotEnregister) + { + SetRegOptional(fieldNode); + } + else + { + MakeSrcContained(putArgStk, fieldNode); + } + } + else if (fieldNode->IsIntCnsFitsInI32()) + { + MakeSrcContained(putArgStk, fieldNode); + } + else + { + // For the case where we cannot directly push the value, if we run out of registers, + // it would be better to defer computation until we are pushing the arguments rather + // than spilling, but this situation is not all that common, as most cases of promoted + // structs do not have a large number of fields, and of those most are lclVars or + // copy-propagated constants. + SetRegOptional(fieldNode); + } + } + else + { + assert(varTypeIsFloating(fieldNode)); + } - const bool fieldIsSlot = - varTypeIsIntegralOrI(fieldType) && ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); + // We can treat as a slot any field that is stored at a slot boundary, where the previous + // field is not in the same slot. (Note that we store the fields in reverse order.) + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); if (!fieldIsSlot) { allFieldsAreSlots = false; @@ -2120,8 +2155,9 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) { // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will - // need a byte-addressable register for the store. - fieldNode->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_NON_BYTE_REGS); + // need a byte-addressable register for the store. We will enforce this requirement on an internal + // register, which we can use to copy multiple byte values. + needsByteTemp = true; } } @@ -2133,10 +2169,28 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) prevOffset = fieldOffset; } - // If all fields of this list are slots, set the copy kind. + // Set the copy kind. + // TODO-X86-CQ: Even if we are using push, if there are contiguous floating point fields, we should + // adjust the stack once for those fields. The latter is really best done in code generation, but + // this tuning should probably be undertaken as a whole. + // Also, if there are floating point fields, it may be better to use the "Unroll" mode + // of copying the struct as a whole, if the fields are not register candidates. if (allFieldsAreSlots) { - putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::AllSlots; + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots; + } + else + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; + // If any of the fields cannot be stored with an actual push, we may need a temporary + // register to load the value before storing it to the stack location. + info->internalIntCount = 1; + regMaskTP regMask = l->allRegs(TYP_INT); + if (needsByteTemp) + { + regMask &= ~RBM_NON_BYTE_REGS; + } + info->setInternalCandidates(l, regMask); } return; } @@ -2218,15 +2272,23 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) info->addInternalCandidates(l, l->internalFloatRegCandidates()); } - // If src or dst are on stack, we don't have to generate the address into a register - // because it's just some constant+SP - putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll; +#ifdef _TARGET_X86_ + if (size < XMM_REGSIZE_BYTES) + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; + } + else +#endif // _TARGET_X86_ + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll; + } } #ifdef _TARGET_X86_ else if (putArgStk->gtNumberReferenceSlots != 0) { // On x86, we must use `push` to store GC references to the stack in order for the emitter to properly update // the function's GC info. These `putargstk` nodes will generate a sequence of `push` instructions. + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; } #endif // _TARGET_X86_ else @@ -2496,6 +2558,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) info->setDstCandidates(l, RBM_RAX); } + bool op2CanBeRegOptional = true; #ifdef _TARGET_X86_ if (op1->OperGet() == GT_LONG) { @@ -2505,7 +2568,9 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) // Src count is actually 3, so increment. assert(op2->IsCnsIntOrI()); + assert(tree->OperGet() == GT_UMOD); info->srcCount++; + op2CanBeRegOptional = false; // This situation also requires an internal register. info->internalIntCount = 1; @@ -2526,7 +2591,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) { MakeSrcContained(tree, op2); } - else + else if (op2CanBeRegOptional) { op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index b0ef498971..bb8c190e9a 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -5724,13 +5724,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio } } - LsraLocation nextLocation = assignedInterval->getNextRefLocation(); + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + LsraLocation nextLocation = assignedInterval->getNextRefLocation(); // We should never spill a register that's occupied by an Interval with its next use at the current location. // Normally this won't occur (unless we actually had more uses in a single node than there are registers), // because we'll always find something with a later nextLocation, but it can happen in stress when // we have LSRA_SELECT_NEAREST. - if ((nextLocation == refLocation) && !refPosition->isFixedRegRef) + if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister()) { continue; } @@ -5815,7 +5816,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio else { // Must have found a spill candidate. - assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef)); + assert(farthestRefPhysRegRecord != nullptr); + if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef) + { + Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval; + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + assert(!nextRefPosition->RequiresRegister()); + } + else + { + assert(farthestLocation > refLocation || refPosition->isFixedRegRef); + } } #endif |