summaryrefslogtreecommitdiff
path: root/src/jit/codegenxarch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/codegenxarch.cpp')
-rw-r--r--src/jit/codegenxarch.cpp1276
1 files changed, 1119 insertions, 157 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 076ba7c262..7064862c4c 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -785,7 +785,6 @@ void CodeGen::genCodeForBBlist()
#endif
/* Both stacks should always be empty on exit from a basic block */
-
noway_assert(genStackLevel == 0);
#ifdef _TARGET_AMD64_
@@ -1571,6 +1570,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
+
emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
genProduceReg(treeNode);
@@ -1618,85 +1618,98 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STORE_LCL_FLD:
{
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(!treeNode->InReg());
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
#ifdef FEATURE_SIMD
- // storing of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genStoreLclFldTypeSIMD12(treeNode);
- break;
- }
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclFldTypeSIMD12(treeNode);
+ break;
+ }
#endif
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
}
break;
case GT_STORE_LCL_VAR:
{
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
#endif // !defined(_TARGET_64BIT_)
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
- varDsc->lvRegNum = REG_STK;
- }
- else
- {
- bool containedOp1 = op1->isContained();
- // Look for the case where we have a constant zero which we've marked for reuse,
- // but which isn't actually in the register we want. In that case, it's better to create
- // zero in the target register, because an xor is smaller than a copy. Note that we could
- // potentially handle this in the register allocator, but we can't always catch it there
- // because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ if (treeNode->gtRegNum == REG_NA)
{
- op1->gtRegNum = REG_NA;
- op1->ResetReuseRegVal();
- containedOp1 = true;
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
+ varDsc->lvRegNum = REG_STK;
}
- if (containedOp1)
+ else
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ bool containedOp1 = op1->isContained();
+ // Look for the case where we have a constant zero which we've marked for reuse,
+ // but which isn't actually in the register we want. In that case, it's better to create
+ // zero in the target register, because an xor is smaller than a copy. Note that we could
+ // potentially handle this in the register allocator, but we can't always catch it there
+ // because the target may not have a register allocated for it yet.
+ if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ {
+ op1->gtRegNum = REG_NA;
+ op1->ResetReuseRegVal();
+ containedOp1 = true;
+ }
+ if (containedOp1)
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
}
- else if (op1->gtRegNum != treeNode->gtRegNum)
+ if (treeNode->gtRegNum != REG_NA)
{
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ genProduceReg(treeNode);
}
}
- if (treeNode->gtRegNum != REG_NA)
- genProduceReg(treeNode);
}
break;
@@ -1717,6 +1730,15 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
if (targetType == TYP_VOID)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ // System V AMD64 spec requires that when a struct is returned by a hidden
+ // argument the RAX should contain the value of the hidden retbuf arg.
+ emit->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
assert(op1 == nullptr);
}
#if !defined(_TARGET_64BIT_)
@@ -1742,53 +1764,233 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#endif // !defined(_TARGET_64BIT_)
else
{
- assert(op1 != nullptr);
- noway_assert(op1->gtRegNum != REG_NA);
-
- // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
- // consumed a reg for the operand. This is because the variable
- // is dead after return. But we are issuing more instructions
- // like "profiler leave callback" after this consumption. So
- // if you are issuing more instructions after this point,
- // remember to keep the variable live up until the new method
- // exit point where it is actually dead.
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(treeNode))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (treeNode->TypeGet() == TYP_STRUCT &&
+ treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
{
- if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ GenTreeLclVarCommon* lclVarPtr = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+ assert(varDsc->lvDontPromote);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
{
- // Store local variable to its home location, if necessary.
- if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
{
- op1->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ assert(false && "Bat float type."); // Not possible.
}
- // Now, load it to the fp stack.
- getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
- else
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
{
- // Spill the value, which should be in a register, then load it to the fp stack.
- // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
- op1->gtFlags |= GTF_SPILL;
- regSet.rsSpillTree(op1->gtRegNum, op1);
- op1->gtFlags |= GTF_SPILLED;
- op1->gtFlags &= ~GTF_SPILL;
-
- TempDsc* t = regSet.rsUnspillInPlace(op1);
- inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
- op1->gtFlags &= ~GTF_SPILLED;
- compiler->tmpRlsTemp(t);
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
+
+ // Move the values into the return registers.
+ //
+ emit->emitIns_R_S(ins_Load(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ emit->emitIns_R_S(ins_Load(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
}
else
-#endif // _TARGET_X86_
- if (op1->gtRegNum != retReg)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(treeNode))
+ {
+ if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ {
+ // Store local variable to its home location, if necessary.
+ if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ op1->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ }
+ // Now, load it to the fp stack.
+ getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ }
+ else
+ {
+ // Spill the value, which should be in a register, then load it to the fp stack.
+ // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+ op1->gtFlags |= GTF_SPILL;
+ regSet.rsSpillTree(op1->gtRegNum, op1);
+ op1->gtFlags |= GTF_SPILLED;
+ op1->gtFlags &= ~GTF_SPILL;
+
+ TempDsc* t = regSet.rsUnspillInPlace(op1);
+ inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+ op1->gtFlags &= ~GTF_SPILLED;
+ compiler->tmpRlsTemp(t);
+ }
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ }
+ }
}
}
@@ -2468,6 +2670,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genPutArgStk(treeNode);
#else // !_TARGET_X86_
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (targetType == TYP_STRUCT)
+ {
+ genPutArgStk(treeNode);
+ break;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
@@ -2536,8 +2746,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_PUTARG_REG:
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
-
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// commas show up here commonly, as part of a nullchk operation
GenTree *op1 = treeNode->gtOp.gtOp1;
// If child node is not already in the register we need, move it
@@ -2546,8 +2757,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
}
+ genProduceReg(treeNode);
}
- genProduceReg(treeNode);
break;
case GT_CALL:
@@ -2767,6 +2978,198 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// This method handles storing double register return struct value to a
+// local homing stack location.
+// It returns true if this is a struct and storing of the returned
+// register value is handled. It returns false otherwise.
+bool
+CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode)
+{
+ if (treeNode->TypeGet() == TYP_STRUCT)
+ {
+ noway_assert(!treeNode->InReg());
+
+ GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon();
+
+ LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Move the values into the return registers.
+ //
+
+ getEmitter()->emitIns_S_R(ins_Store(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ getEmitter()->emitIns_S_R(ins_Store(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
+
+ return true;
+ }
+
+ return false;
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Generate code for division (or mod) by power of two
// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
@@ -3366,40 +3769,55 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
// Generate code for a load from some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+ if (baseNode->OperIsLocalAddr())
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
}
else
{
- emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset);
+ emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
}
}
// Generate code for a store to some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (baseNode->OperGet() == GT_PUTARG_STK)
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+ GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk();
+ assert(putArgStkNode->gtOp.gtOp1->isContained());
+ assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ);
+
+ emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar,
+ (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset);
}
else
+#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+ }
}
}
@@ -3523,6 +3941,126 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode)
instGen(INS_r_movsb);
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// Generates PutArg code by performing a loop unroll
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+// The difference for now is thethe putarg_stk contains it's children, while cpyblk not.
+// This creates differences in code. After some significant refactoring it could be reused.
+void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter *emit = getEmitter();
+
+ assert(srcAddr->isContained());
+ assert(srcAddr->gtOper == GT_LDOBJ);
+
+ if (!srcAddr->gtOp.gtOp1->isContained())
+ {
+ genConsumeReg(srcAddr->gtOp.gtOp1);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(putArgNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node.
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ }
+#else // !_TARGET_X86_
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode)
+{
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+#ifdef DEBUG
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+#endif // DEBUG
+
+ // Validate state.
+ assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+
+#ifdef DEBUG
+ assert(srcAddr->isContained());
+
+#ifdef _TARGET_AMD64_
+ assert(size > CPBLK_UNROLL_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#endif
+
+#endif // DEBUG
+ genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
+ instGen(INS_r_movsb);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Generate code for CpObj nodes wich copy structs that have interleaved
// GC pointers.
// This will generate a sequence of movsq instructions for the cases of non-gc members
@@ -3686,7 +4224,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
{
#ifdef _TARGET_AMD64_
// Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr blockSize = cpBlkNode->Size();
+ GenTreePtr blockSize = cpBlkNode->Size();
GenTreePtr dstAddr = cpBlkNode->Dest();
GenTreePtr srcAddr = cpBlkNode->Source();
@@ -3705,7 +4243,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
#else // !_TARGET_AMD64_
- NYI_X86("Helper call for CpBlk");
+ noway_assert(false && "Helper call for CpBlk is not needed.");
#endif // !_TARGET_AMD64_
}
@@ -4558,7 +5096,9 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
// genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
genUpdateLife(tree);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(tree->gtRegNum != REG_NA);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// there are three cases where consuming a reg means clearing the bit in the live mask
// 1. it was not produced by a local
@@ -4678,6 +5218,82 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // The putArgNode children are always contained. We should not consume any registers.
+
+ GenTree* dst = putArgNode;
+
+#ifdef DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1;
+ assert(src->OperGet() == GT_LDOBJ);
+ src = src->gtOp.gtOp1;
+#else // !DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1;
+#endif // !DEBUG
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ GenTree* op1;
+ GenTree* op2;
+
+ regNumber reg1, reg2, reg3;
+ op1 = dst;
+ reg1 = dstReg;
+ op2 = src;
+ reg2 = srcReg;
+ reg3 = sizeReg;
+
+ if (reg2 != REG_NA && op2->gtRegNum != REG_NA)
+ {
+ genConsumeReg(op2);
+ }
+
+ if ((reg1 != REG_NA) && (op1->gtRegNum != reg1))
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI.
+ LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar];
+ int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE;
+ // Outgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset);
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ }
+
+ if (op2->gtRegNum != reg2)
+ {
+ if (src->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(src->isContained());
+ GenTreeLclVarCommon* lclNode = src->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum];
+ int offset = varLclDsc->lvStkOffs;
+
+ // Otutgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset);
+ }
+ else
+ {
+ assert(src->gtRegNum != REG_NA);
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum);
+ }
+ }
+
+ if ((reg3 != REG_NA))
+ {
+ inst_RV_IV(INS_mov, reg3, size, EA_8BYTE);
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
{
// We have to consume the registers, and perform any copies, in the actual execution order.
@@ -4827,7 +5443,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
}
}
-
// generates an ip-relative call or indirect call via reg ('call reg')
// pass in 'addr' for a relative call or 'base' for a indirect register call
// methHnd - optional, only used for pretty printing
@@ -4843,9 +5458,9 @@ void CodeGen::genEmitCall(int callType,
bool isJump,
bool isNoGC)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
@@ -4867,14 +5482,14 @@ void CodeGen::genEmitCall(int callType,
void CodeGen::genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
- GenTreeIndir* indir
+ GenTreeIndir* indir
X86_ARG(ssize_t argSize),
emitAttr retSize,
IL_OFFSETX ilOffset)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
genConsumeAddress(indir->Addr());
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
@@ -4920,13 +5535,49 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (curArgTabEntry->regNum == REG_STK)
continue;
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
{
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ regNumber argReg = REG_NA;
+ if (iterationNum == 0)
+ {
+ argReg = curArgTabEntry->regNum;
+ }
+ else if (iterationNum == 1)
+ {
+ argReg = curArgTabEntry->otherRegNum;
+ }
+ else
+ {
+ assert(false); // Illegal state.
+ }
+
+ genConsumeReg(putArgRegNode);
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum);
+ }
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
}
+#if FEATURE_VARARG
// In the case of a varargs call,
// the ABI dictates that if we have floating point args,
// we must pass the enregistered arguments in both the
@@ -4937,9 +5588,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
inst_RV_RV(ins, argNode->gtRegNum, targetReg);
}
+#endif // FEATURE_VARARG
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// The call will pop its arguments.
// for each putarg_stk:
ssize_t stackArgBytes = 0;
@@ -4949,16 +5601,31 @@ void CodeGen::genCallInstruction(GenTreePtr node)
GenTreePtr arg = args->gtOp.gtOp1;
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
+#if defined(_TARGET_X86_)
assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
if (arg->OperGet() == GT_LONG)
{
assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
}
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+ {
+ if (arg->OperGet() == GT_PUTARG_STK)
+ {
+ GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj();
+ stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass);
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
args = args->gtOp.gtOp2;
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Insert a null check on "this" pointer if asked.
if (call->NeedsNullCheck())
@@ -5056,9 +5723,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
(void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5070,9 +5737,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
target->AsIndir(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5086,9 +5753,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
nullptr, //addr
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset,
genConsumeReg(target));
@@ -5153,9 +5820,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
addr,
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // _defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5168,10 +5835,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
genPendingCallLabel = nullptr;
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
// The call will pop its arguments.
genStackLevel -= stackArgBytes;
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
// Update GC info:
// All Callee arg registers are trashed and no longer contain any GC pointers.
@@ -5218,6 +5885,130 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
}
+//------------------------------------------------------------------------
+// genGetStructTypeSizeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
+//
+// Arguments:
+// 'structDesc' struct description
+// 'type0' returns the type of the first eightbyte.
+// 'type1' returns the type of the second eightbyte.
+// 'size0' returns the size of the first eightbyte.
+// 'size1' returns the size of the second eightbyte.
+// 'offset0' returns the offset of the first eightbyte.
+// 'offset1' returns the offset of the second eightbyte.
+//
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+void CodeGen::genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0, var_types* type1, emitAttr* size0, emitAttr* size1,
+ unsigned __int8* offset0, unsigned __int8* offset1)
+{
+ *size0 = EA_UNKNOWN;
+ *offset0 = structDesc.eightByteOffsets[0];
+ *size1 = EA_UNKNOWN;
+ *offset1 = structDesc.eightByteOffsets[1];
+
+ *type0 = TYP_UNKNOWN;
+ *type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ if (structDesc.eightByteCount >= 1)
+ {
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ *size0 = EA_GCREF;
+ *type0 = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+
+ // Set the second eight byte data
+ if (structDesc.eightByteCount == 2)
+ {
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_INT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_LONG;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ *type1 = TYP_REF;
+ *size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_FLOAT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_DOUBLE;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
// Produce code for a GT_JMP node.
// The arguments of the caller needs to be transferred to the callee before exiting caller.
// The actual jump to callee is generated as part of caller epilog sequence.
@@ -5319,36 +6110,94 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
if (!varDsc->lvIsRegArg)
continue;
- // Register argument
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varDsc->lvType == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
- // Is register argument already in the right register?
- // If not load it from its stack location.
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
- if (varDsc->lvRegNum != argReg)
- {
- assert(genIsValidReg(argReg));
+ emitAttr size0 = EA_UNKNOWN;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned __int8 offset0 = 0;
+ unsigned __int8 offset1 = 0;
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Get the eightbyte data
+ genGetStructTypeSizeOffset(structDesc, &type0, &type1, &size0, &size1, &offset0, &offset1);
+
+ // Move the values into the right registers.
+ //
+ if (type0 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type0), size0, varDsc->lvArgReg, varNum, offset0);
+
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+ }
+
+ if (type1 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type1), size1, varDsc->lvOtherArgReg, varNum, offset1);
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+ }
- // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
- regSet.rsMaskVars |= genRegMask(argReg);
- gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (varDsc->lvTracked)
{
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
}
}
+ else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(argReg);
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+ }
+
+#if FEATURE_VARARG
// In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
if (compiler->info.compIsVarArgs)
{
regNumber intArgReg;
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
if (varTypeIsFloating(loadType))
{
intArgReg = compiler->getCallArgIntRegister(argReg);
@@ -5368,8 +6217,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
firstArgVarNum = varNum;
}
}
+#endif // FEATURE_VARARG
}
+#if FEATURE_VARARG
// Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
// load the remaining arg registers (both int and float) from the corresponding
// shadow stack slots. This is for the reason that we don't know the number and type
@@ -5409,7 +6260,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
getEmitter()->emitEnableGC();
}
}
-
+#endif // FEATURE_VARARG
}
// produce code for a GT_LEA subnode
@@ -6488,13 +7339,122 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode)
genProduceReg(treeNode);
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for putting a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// None
+//
void
CodeGen::genPutArgStk(GenTreePtr treeNode)
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(treeNode->OperGet() == GT_PUTARG_STK);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
noway_assert(targetType != TYP_STRUCT);
+#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(targetType == TYP_STRUCT);
+
+ GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+ if (putArgStk->gtNumberReferenceSlots == 0)
+ {
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::PutArgStkKindRepInstr:
+ genCodeForPutArgRepMovs(putArgStk);
+ break;
+ case GenTreePutArgStk::PutArgStkKindUnroll:
+ genCodeForPutArgUnroll(putArgStk);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+ GenTreePtr dstAddr = putArgStk;
+ GenTreePtr srcAddr = putArgStk->gtOp.gtOp1;
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+ unsigned slots = putArgStk->gtNumSlots;
+
+ // We are always on the stack we don't need to use the write barrier.
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+ unsigned i = 0;
+ unsigned copiedSlots = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ copiedSlots += nonGcSlotCount;
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ copiedSlots += nonGcSlotCount;
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+ default:
+ // We have a GC pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
+ // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar,
+ ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE);
+ copiedSlots++;
+ gcPtrCount--;
+ i++;
+ }
+ }
+
+ gcInfo.gcMarkRegSetNpt(RBM_RSI);
+ gcInfo.gcMarkRegSetNpt(RBM_RDI);
+ }
+ return;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
GenTreePtr data = treeNode->gtOp.gtOp1;
@@ -6508,7 +7468,9 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
// Decrement SP.
int argSize = genTypeSize(genActualType(targetType));
inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
genStackLevel += argSize;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContained())
@@ -6522,7 +7484,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
}
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
*