diff options
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegencommon.cpp | 303 | ||||
-rw-r--r-- | src/jit/codegenlegacy.cpp | 2 | ||||
-rw-r--r-- | src/jit/codegenlinear.h | 21 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 1276 | ||||
-rw-r--r-- | src/jit/compiler.cpp | 121 | ||||
-rw-r--r-- | src/jit/compiler.h | 158 | ||||
-rw-r--r-- | src/jit/compiler.hpp | 15 | ||||
-rw-r--r-- | src/jit/ee_il_dll.cpp | 64 | ||||
-rw-r--r-- | src/jit/emit.cpp | 3 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 5 | ||||
-rw-r--r-- | src/jit/flowgraph.cpp | 107 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 117 | ||||
-rw-r--r-- | src/jit/gentree.h | 193 | ||||
-rw-r--r-- | src/jit/importer.cpp | 266 | ||||
-rw-r--r-- | src/jit/jit.h | 20 | ||||
-rw-r--r-- | src/jit/jitgcinfo.h | 1 | ||||
-rw-r--r-- | src/jit/lclvars.cpp | 613 | ||||
-rw-r--r-- | src/jit/lower.cpp | 231 | ||||
-rw-r--r-- | src/jit/lower.h | 4 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 323 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 106 | ||||
-rw-r--r-- | src/jit/lsra.h | 9 | ||||
-rw-r--r-- | src/jit/morph.cpp | 825 | ||||
-rw-r--r-- | src/jit/regalloc.cpp | 2 | ||||
-rw-r--r-- | src/jit/scopeinfo.cpp | 62 | ||||
-rw-r--r-- | src/jit/target.h | 26 |
26 files changed, 4116 insertions, 757 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 0828a160c9..ea3cce6cc8 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -3648,7 +3648,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, RegState *regState) { #ifdef DEBUG - if (verbose) + if (verbose) printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int"); #endif @@ -3678,6 +3678,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, struct { unsigned varNum; // index into compiler->lvaTable[] for this register argument +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types type; // the Jit type of this regArgTab entry +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to // argument register number 'x'. Only used when circular = true. @@ -3691,18 +3694,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, unsigned varNum; LclVarDsc * varDsc; - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; - varNum++ , varDsc++) + varNum++, varDsc++) { /* Is this variable a register arg? */ - - if (!varDsc->lvIsParam) + if (!varDsc->lvIsParam) + { continue; + } - if (!varDsc->lvIsRegArg) + if (!varDsc->lvIsRegArg) + { continue; + } // When we have a promoted struct we have two possible LclVars that can represent the incoming argument // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. @@ -3726,13 +3731,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, // For register arguments that are independent promoted structs we put the promoted field varNum in the regArgTab[] if (varDsc->lvPromoted) + { continue; + } } else { // For register arguments that are not independent promoted structs we put the parent struct varNum in the regArgTab[] if (varDsc->lvIsStructField) + { continue; + } } } @@ -3743,19 +3752,89 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, var_types regType = varDsc->TypeGet(); #endif // !_TARGET_ARM_ - if (isFloatRegType(regType) != doingFloat) - continue; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regType != TYP_STRUCT) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // A struct might be passed partially in XMM register for System V calls. + // So a single arg might use both register files. + if (isFloatRegType(regType) != doingFloat) + { + continue; + } + } - /* Bingo - add it to our table */ - - regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType); - noway_assert(regArgNum < regState->rsCalleeRegArgNum); - noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register) + int slots = 0; - regArgTab[regArgNum].varNum = varNum; - regArgTab[regArgNum].slot = 1; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (varDsc->TypeGet() == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + if (!structDesc.passedInRegisters) + { + // The var is not passed in registers. + continue; + } - int slots = 1; + unsigned firstRegSlot = 0; + for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++) + { + regNumber regNum = varDsc->lvRegNumForSlot(slotCounter); + + var_types regType = compiler->getEightByteType(structDesc, slotCounter); + + regArgNum = genMapRegNumToRegArgNum(regNum, regType); + + if ((!doingFloat && + ((structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeInteger) || + (structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeIntegerReference))) || + (doingFloat && structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeSSE)) + { + // Store the reg for the first slot. + if (slots == 0) + { + firstRegSlot = regArgNum; + } + + // Bingo - add it to our table + noway_assert(regArgNum < regState->rsCalleeRegArgNum); + noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register) + regArgTab[regArgNum].varNum = varNum; + regArgTab[regArgNum].slot = (char)(slotCounter + 1); + regArgTab[regArgNum].type = regType; + slots++; + } + } + + if (slots == 0) + { + continue; // Nothing to do for this regState set. + } + + regArgNum = firstRegSlot; + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // Bingo - add it to our table + regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType); + noway_assert(regArgNum < regState->rsCalleeRegArgNum); + // we better not have added it already (there better not be multiple vars representing this argument register) + noway_assert(regArgTab[regArgNum].slot == 0); + + // Set the register type. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regArgTab[regArgNum].type = regType; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + regArgTab[regArgNum].varNum = varNum; + regArgTab[regArgNum].slot = 1; + + slots = 1; + } #ifdef _TARGET_ARM_ int lclSize = compiler->lvaLclSize(varNum); @@ -3778,9 +3857,23 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (int i = 0; i < slots; i ++) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For structs passed in registers on System V systems, + // get the regType from the table for each slot. + if (regType == TYP_STRUCT) + { + regType = regArgTab[regArgNum + i].type; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType); - assert((i > 0) || (regNum == varDsc->lvArgReg)); + // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold. + // The type of the register depends on the classification of the first eightbyte + // of the struct. For information on classification refer to the System V x86_64 ABI at: + // http://www.x86-64.org/documentation/abi.pdf +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert((i > 0) || (regNum == varDsc->lvArgReg)); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Is the arg dead on entry to the method ? if ((regArgMaskLive & genRegMask(regNum)) == 0) @@ -3831,8 +3924,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, /* If it goes on the stack or in a register that doesn't hold * an argument anymore -> CANNOT form a circular dependency */ - if ( varDsc->lvIsInReg() && - (genRegMask(regNum) & regArgMaskLive) ) + if (varDsc->lvIsInReg() && + (genRegMask(regNum) & regArgMaskLive)) { /* will trash another argument -> possible dependency * We may need several passes after the table is constructed @@ -3841,22 +3934,33 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, /* Maybe the argument stays in the register (IDEAL) */ if ((i == 0) && (varDsc->lvRegNum == regNum)) + { goto NON_DEP; + } +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if ((i == 1) && (varDsc->TypeGet() == TYP_STRUCT) && + (varDsc->lvOtherReg == regNum)) + { + goto NON_DEP; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && - (varDsc->lvOtherReg == regNum)) + (varDsc->lvOtherReg == regNum)) + { goto NON_DEP; + } if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) && - (REG_NEXT(varDsc->lvRegNum) == regNum)) + (REG_NEXT(varDsc->lvRegNum) == regNum)) + { goto NON_DEP; - + } regArgTab[regArgNum+i].circular = true; } else { NON_DEP: - regArgTab[regArgNum+i].circular = false; /* mark the argument register as free */ @@ -3870,7 +3974,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */ bool change = true; - if (regArgMaskLive) { /* Possible circular dependencies still exist; the previous pass was not enough @@ -3882,15 +3985,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { - /* If we already marked the argument as non-circular then continue */ + // If we already marked the argument as non-circular then continue if (!regArgTab[argNum].circular) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; + } - varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); + varNum = regArgTab[argNum].varNum; + noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); @@ -3899,11 +4007,19 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, noway_assert(!regArgTab[argNum].stackArg); regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet()); + regNumber destRegNum; if (regArgTab[argNum].slot == 1) { destRegNum = varDsc->lvRegNum; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + else + { + assert(regArgTab[argNum].slot == 2); + destRegNum = varDsc->lvOtherReg; + } +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) { @@ -3915,7 +4031,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, assert(varDsc->TypeGet() == TYP_DOUBLE); destRegNum = REG_NEXT(varDsc->lvRegNum); } - +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (genRegMask(destRegNum) & regArgMaskLive) { /* we are trashing a live argument register - record it */ @@ -3949,33 +4065,47 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } #endif - // TODO-AMD64-Bug? - homing of float argument registers with circular dependencies. -#ifdef _TARGET_AMD64_ - NYI_IF((regArgMaskLive & RBM_FLTARG_REGS) != 0, "Homing of float argument registers with circular dependencies not implemented"); -#endif // _TARGET_AMD64_ + // LSRA allocates registers to incoming parameters in order and will not overwrite + // a register still holding a live parameter. +#ifndef LEGACY_BACKEND + noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented."); +#endif // LEGACY_BACKEND /* Now move the arguments to their locations. * First consider ones that go on the stack since they may * free some registers. */ regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start - for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { emitAttr size; - /* If the arg is dead on entry to the method, skip it */ + // If this is the wrong register file, just continue. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regArgTab[argNum].type == TYP_UNDEF) + { + // This could happen if the reg in regArgTab[argNum] is of the other register file - + // for System V register passed structs where the first reg is GPR and the second an XMM reg. + // The next register file processing will process it. + continue; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // If the arg is dead on entry to the method, skip it if (regArgTab[argNum].processed) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; + } varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; - /* If not a stack arg go to the next one */ + // If not a stack arg go to the next one #ifndef _TARGET_64BIT_ if (varDsc->lvType == TYP_LONG) @@ -3993,7 +4123,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, #endif // !_TARGET_64BIT_ { if (!regArgTab[argNum].stackArg) + { continue; + } } #if defined(_TARGET_ARM_) @@ -4021,10 +4153,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, { size = EA_SIZE(varDsc->lvSize()); #if defined(_TARGET_AMD64_) - storeType = (var_types) ((size <= 4) ? TYP_INT : TYP_I_IMPL); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + storeType = (var_types)((size <= 4) ? TYP_INT : TYP_I_IMPL); // Must be 1, 2, 4, or 8, or else it wouldn't be passed in a register noway_assert(EA_SIZE_IN_BYTES(size) <= 8); assert((EA_SIZE_IN_BYTES(size) & (EA_SIZE_IN_BYTES(size) - 1)) == 0); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + storeType = regArgTab[argNum].type; + size = emitActualTypeSize(storeType); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) // Must be <= 16 bytes or else it wouldn't be passed in registers noway_assert(EA_SIZE_IN_BYTES(size) <= 16); @@ -4060,7 +4197,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType); - /* Stack argument - if the ref count is 0 don't care about it */ + // Stack argument - if the ref count is 0 don't care about it if (!varDsc->lvOnFrame) { @@ -4084,6 +4221,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regArgTab[argNum].processed = true; regArgMaskLive &= ~genRegMask(srcRegNum); + #if defined(_TARGET_ARM_) if (storeType == TYP_DOUBLE) { @@ -4094,7 +4232,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } /* Process any circular dependencies */ - if (regArgMaskLive) { unsigned begReg, destReg, srcReg; @@ -4105,21 +4242,39 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, if (doingFloat) { +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #if defined(_TARGET_ARM_) insCopy = INS_vmov; - +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + insCopy = INS_mov; +#else +#error Error. Wrong architecture. +#endif // Compute xtraReg here when we have a float argument assert(xtraReg == REG_NA); regMaskTP fpAvailMask; fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive; +#if defined(_TARGET_ARM_) fpAvailMask &= RBM_DBL_REGS; +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fpAvailMask &= RBM_ALLFLOAT; +#else +#error Error. Wrong architecture. +#endif + if (fpAvailMask == RBM_NONE) { fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive; +#if defined(_TARGET_ARM_) fpAvailMask &= RBM_DBL_REGS; +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fpAvailMask &= RBM_ALLFLOAT; +#else +#error Error. Wrong architecture. +#endif } assert(fpAvailMask != RBM_NONE); @@ -4135,23 +4290,30 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { - /* If not a circular dependency then continue */ - + // If not a circular dependency then continue if (!regArgTab[argNum].circular) + { continue; + } - /* If already processed the dependency then continue */ + // If already processed the dependency then continue if (regArgTab[argNum].processed) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; - + } + destReg = begReg = argNum; - srcReg = regArgTab[argNum].trashBy; noway_assert(srcReg < regState->rsCalleeRegArgNum); + srcReg = regArgTab[argNum].trashBy; + noway_assert(srcReg < regState->rsCalleeRegArgNum); - varNumDest = regArgTab[destReg].varNum; noway_assert(varNumDest < compiler->lvaCount); + varNumDest = regArgTab[destReg].varNum; + noway_assert(varNumDest < compiler->lvaCount); varDscDest = compiler->lvaTable + varNumDest; noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg); @@ -4376,6 +4538,18 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, varDsc = compiler->lvaTable + varNum; regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet()); + // If this is the wrong register file, just continue. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regArgTab[argNum].type == TYP_UNDEF) + { + // This could happen if the reg in regArgTab[argNum] is of the other register file - + // for System V register passed structs where the first reg is GPR and the second an XMM reg. + // The next register file processing will process it. + regArgMaskLive &= ~genRegMask(regNum); + continue; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); #ifndef _WIN64 //Right now we think that incoming arguments are not pointer sized. When we eventually @@ -4506,7 +4680,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } #endif } - + noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop } } @@ -6729,12 +6903,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, regNumber argReg = varDsc->lvArgReg; getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); +#if FEATURE_VARARG if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) { regNumber intArgReg = compiler->getCallArgIntRegister(argReg); instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); inst_RV_RV(ins, argReg, intArgReg, loadType); } +#endif // FEATURE_VARARG } // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. @@ -8495,6 +8671,7 @@ void CodeGen::genFnProlog() #endif // !LEGACY_BACKEND RegState *regState; + FOREACH_REGISTER_FILE(regState) { if (regState->rsCalleeRegArgMaskLiveIn) @@ -10789,8 +10966,8 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) //------------------------------------------------------------------------ // ARM-specific methods used by both the classic and RyuJIT //------------------------------------------------------------------------ -#ifdef _TARGET_ARM_ -CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree) { if (tree->TypeGet() == TYP_STRUCT) { @@ -10809,7 +10986,7 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) case GT_RETURN: assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR); - return GetHfaClassHandle(tree->gtOp.gtOp1); + return GetStructClassHandle(tree->gtOp.gtOp1); case GT_LDOBJ: return tree->gtLdObj.gtClass; @@ -10823,15 +11000,35 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) case GT_ASG: assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD); - return GetHfaClassHandle(tree->gtOp.gtOp1); - + return GetStructClassHandle(tree->gtOp.gtOp1); default: - unreached(); + return NO_CLASS_HANDLE; } } return NO_CLASS_HANDLE; } +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +bool Compiler::IsRegisterPassable(CORINFO_CLASS_HANDLE hClass) +{ + if (hClass == NO_CLASS_HANDLE) + { + return false; + } + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc); + return structDesc.passedInRegisters; +} +bool Compiler::IsRegisterPassable(GenTreePtr tree) +{ + return IsRegisterPassable(GetStructClassHandle(tree)); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + +#ifdef _TARGET_ARM_ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) { return varTypeIsFloating(GetHfaType(hClass)); @@ -10839,12 +11036,12 @@ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) bool Compiler::IsHfa(GenTreePtr tree) { - return IsHfa(GetHfaClassHandle(tree)); + return IsHfa(GetStructClassHandle(tree)); } var_types Compiler::GetHfaType(GenTreePtr tree) { - return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetHfaClassHandle(tree)) : TYP_UNDEF; + return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetStructClassHandle(tree)) : TYP_UNDEF; } unsigned Compiler::GetHfaSlots(GenTreePtr tree) diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index e37322d3b4..0914f7d7d6 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -12870,7 +12870,7 @@ void CodeGen::genCodeForBBlist() genStackLevel = 0; #if FEATURE_STACK_FP_X87 genResetFPstkLevel(); -#endif //FEATURE_STACK_FP_X87 +#endif // FEATURE_STACK_FP_X87 #if !FEATURE_FIXED_OUT_ARGS /* Check for inserted throw blocks and adjust genStackLevel */ diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 57eac7ced4..6a030eb926 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -103,6 +103,10 @@ void genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + void genConsumePutArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); @@ -126,6 +130,11 @@ void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + void genCodeForPutArgRepMovs(GenTreePutArgStk* putArgStkNode); + void genCodeForPutArgUnroll(GenTreePutArgStk* putArgStkNode); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); @@ -150,6 +159,18 @@ void genJmpMethod(GenTreePtr jmp); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, + var_types* type0, + var_types* type1, + emitAttr* size0, + emitAttr* size1, + unsigned __int8* offset0, + unsigned __int8* offset1); + + bool genStoreRegisterReturnInLclVar(GenTreePtr treeNode); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void genLclHeap(GenTreePtr tree); bool genIsRegCandidateLocal (GenTreePtr tree) diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 076ba7c262..7064862c4c 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -785,7 +785,6 @@ void CodeGen::genCodeForBBlist() #endif /* Both stacks should always be empty on exit from a basic block */ - noway_assert(genStackLevel == 0); #ifdef _TARGET_AMD64_ @@ -1571,6 +1570,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) { assert(!isRegCandidate); + emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0); genProduceReg(treeNode); @@ -1618,85 +1618,98 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_STORE_LCL_FLD: { - noway_assert(targetType != TYP_STRUCT); - noway_assert(!treeNode->InReg()); - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!genStoreRegisterReturnInLclVar(treeNode)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + noway_assert(targetType != TYP_STRUCT); + noway_assert(!treeNode->InReg()); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); #ifdef FEATURE_SIMD - // storing of TYP_SIMD12 (i.e. Vector3) field - if (treeNode->TypeGet() == TYP_SIMD12) - { - genStoreLclFldTypeSIMD12(treeNode); - break; - } + // storing of TYP_SIMD12 (i.e. Vector3) field + if (treeNode->TypeGet() == TYP_SIMD12) + { + genStoreLclFldTypeSIMD12(treeNode); + break; + } #endif - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); - emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + } } break; case GT_STORE_LCL_VAR: { - noway_assert(targetType != TYP_STRUCT); - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!genStoreRegisterReturnInLclVar(treeNode)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + noway_assert(targetType != TYP_STRUCT); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); #if !defined(_TARGET_64BIT_) - if (treeNode->TypeGet() == TYP_LONG) - { - genStoreLongLclVar(treeNode); - break; - } + if (treeNode->TypeGet() == TYP_LONG) + { + genStoreLongLclVar(treeNode); + break; + } #endif // !defined(_TARGET_64BIT_) - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); - if (treeNode->gtRegNum == REG_NA) - { - // stack store - emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); - varDsc->lvRegNum = REG_STK; - } - else - { - bool containedOp1 = op1->isContained(); - // Look for the case where we have a constant zero which we've marked for reuse, - // but which isn't actually in the register we want. In that case, it's better to create - // zero in the target register, because an xor is smaller than a copy. Note that we could - // potentially handle this in the register allocator, but we can't always catch it there - // because the target may not have a register allocated for it yet. - if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero()) + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + + if (treeNode->gtRegNum == REG_NA) { - op1->gtRegNum = REG_NA; - op1->ResetReuseRegVal(); - containedOp1 = true; + // stack store + emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); + varDsc->lvRegNum = REG_STK; } - if (containedOp1) + else { - // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register - // must be a constant. However, in the future we might want to support a contained memory op. - // This is a bit tricky because we have to decide it's contained before register allocation, - // and this would be a case where, once that's done, we need to mark that node as always - // requiring a register - which we always assume now anyway, but once we "optimize" that - // we'll have to take cases like this into account. - assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); - genSetRegToConst(treeNode->gtRegNum, targetType, op1); + bool containedOp1 = op1->isContained(); + // Look for the case where we have a constant zero which we've marked for reuse, + // but which isn't actually in the register we want. In that case, it's better to create + // zero in the target register, because an xor is smaller than a copy. Note that we could + // potentially handle this in the register allocator, but we can't always catch it there + // because the target may not have a register allocated for it yet. + if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero()) + { + op1->gtRegNum = REG_NA; + op1->ResetReuseRegVal(); + containedOp1 = true; + } + if (containedOp1) + { + // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register + // must be a constant. However, in the future we might want to support a contained memory op. + // This is a bit tricky because we have to decide it's contained before register allocation, + // and this would be a case where, once that's done, we need to mark that node as always + // requiring a register - which we always assume now anyway, but once we "optimize" that + // we'll have to take cases like this into account. + assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); + genSetRegToConst(treeNode->gtRegNum, targetType, op1); + } + else if (op1->gtRegNum != treeNode->gtRegNum) + { + assert(op1->gtRegNum != REG_NA); + emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); + } } - else if (op1->gtRegNum != treeNode->gtRegNum) + if (treeNode->gtRegNum != REG_NA) { - assert(op1->gtRegNum != REG_NA); - emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); + genProduceReg(treeNode); } } - if (treeNode->gtRegNum != REG_NA) - genProduceReg(treeNode); } break; @@ -1717,6 +1730,15 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtOp.gtOp1; if (targetType == TYP_VOID) { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (compiler->info.compRetBuffArg != BAD_VAR_NUM) + { + // System V AMD64 spec requires that when a struct is returned by a hidden + // argument the RAX should contain the value of the hidden retbuf arg. + emit->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(op1 == nullptr); } #if !defined(_TARGET_64BIT_) @@ -1742,53 +1764,233 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) #endif // !defined(_TARGET_64BIT_) else { - assert(op1 != nullptr); - noway_assert(op1->gtRegNum != REG_NA); - - // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has - // consumed a reg for the operand. This is because the variable - // is dead after return. But we are issuing more instructions - // like "profiler leave callback" after this consumption. So - // if you are issuing more instructions after this point, - // remember to keep the variable live up until the new method - // exit point where it is actually dead. - genConsumeReg(op1); - - regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; -#ifdef _TARGET_X86_ - if (varTypeIsFloating(treeNode)) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (treeNode->TypeGet() == TYP_STRUCT && + treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR) { - if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister) + GenTreeLclVarCommon* lclVarPtr = treeNode->gtOp.gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + assert(varDsc->lvDontPromote); + + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + regNumber retReg0 = REG_NA; + emitAttr size0 = EA_UNKNOWN; + unsigned offset0 = structDesc.eightByteOffsets[0]; + regNumber retReg1 = REG_NA; + emitAttr size1 = EA_UNKNOWN; + unsigned offset1 = structDesc.eightByteOffsets[1]; + + bool firstIntUsed = false; + bool firstFloatUsed = false; + + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + switch (structDesc.eightByteClassifications[0]) { - // Store local variable to its home location, if necessary. - if ((op1->gtFlags & GTF_REG_VAL) != 0) + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_INTRET; + size0 = EA_4BYTE; + type0 = TYP_INT; + firstIntUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_LNGRET; + size0 = EA_8BYTE; + type0 = TYP_LONG; + firstIntUsed = true; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + retReg0 = REG_LNGRET; + size0 = EA_GCREF; + type0 = TYP_REF; + firstIntUsed = true; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_FLOATRET; + size0 = EA_4BYTE; + type0 = TYP_FLOAT; + firstFloatUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_DOUBLERET; + size0 = EA_8BYTE; + type0 = TYP_DOUBLE; + firstFloatUsed = true; + } + else { - op1->gtFlags &= ~GTF_REG_VAL; - inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum); + assert(false && "Bat float type."); // Not possible. } - // Now, load it to the fp stack. - getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); + break; + default: + assert(false && "Bad EightByte classification."); + break; } - else + + // Set the second eight byte data + switch (structDesc.eightByteClassifications[1]) { - // Spill the value, which should be in a register, then load it to the fp stack. - // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). - op1->gtFlags |= GTF_SPILL; - regSet.rsSpillTree(op1->gtRegNum, op1); - op1->gtFlags |= GTF_SPILLED; - op1->gtFlags &= ~GTF_SPILL; - - TempDsc* t = regSet.rsUnspillInPlace(op1); - inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); - op1->gtFlags &= ~GTF_SPILLED; - compiler->tmpRlsTemp(t); + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstIntUsed) + { + retReg1 = REG_INTRET_1; + } + else + { + retReg1 = REG_INTRET; + } + type1 = TYP_INT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_LONG; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_REF; + size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstFloatUsed) + { + retReg1 = REG_FLOATRET_1; + } + else + { + retReg1 = REG_FLOATRET; + } + type1 = TYP_FLOAT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstFloatUsed) + { + retReg1 = REG_DOUBLERET_1; + } + else + { + retReg1 = REG_DOUBLERET; + } + type1 = TYP_DOUBLE; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; } + + // Move the values into the return registers. + // + emit->emitIns_R_S(ins_Load(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0); + emit->emitIns_R_S(ins_Load(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1); } else -#endif // _TARGET_X86_ - if (op1->gtRegNum != retReg) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { - inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); + assert(op1 != nullptr); + noway_assert(op1->gtRegNum != REG_NA); + + // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has + // consumed a reg for the operand. This is because the variable + // is dead after return. But we are issuing more instructions + // like "profiler leave callback" after this consumption. So + // if you are issuing more instructions after this point, + // remember to keep the variable live up until the new method + // exit point where it is actually dead. + genConsumeReg(op1); + + regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; +#ifdef _TARGET_X86_ + if (varTypeIsFloating(treeNode)) + { + if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister) + { + // Store local variable to its home location, if necessary. + if ((op1->gtFlags & GTF_REG_VAL) != 0) + { + op1->gtFlags &= ~GTF_REG_VAL; + inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum); + } + // Now, load it to the fp stack. + getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); + } + else + { + // Spill the value, which should be in a register, then load it to the fp stack. + // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). + op1->gtFlags |= GTF_SPILL; + regSet.rsSpillTree(op1->gtRegNum, op1); + op1->gtFlags |= GTF_SPILLED; + op1->gtFlags &= ~GTF_SPILL; + + TempDsc* t = regSet.rsUnspillInPlace(op1); + inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); + op1->gtFlags &= ~GTF_SPILLED; + compiler->tmpRlsTemp(t); + } + } + else +#endif // _TARGET_X86_ + { + if (op1->gtRegNum != retReg) + { + inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); + } + } } } @@ -2468,6 +2670,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genPutArgStk(treeNode); #else // !_TARGET_X86_ { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (targetType == TYP_STRUCT) + { + genPutArgStk(treeNode); + break; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(targetType != TYP_STRUCT); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); @@ -2536,8 +2746,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_PUTARG_REG: { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(targetType != TYP_STRUCT); - +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // commas show up here commonly, as part of a nullchk operation GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it @@ -2546,8 +2757,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); } + genProduceReg(treeNode); } - genProduceReg(treeNode); break; case GT_CALL: @@ -2767,6 +2978,198 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// This method handles storing double register return struct value to a +// local homing stack location. +// It returns true if this is a struct and storing of the returned +// register value is handled. It returns false otherwise. +bool +CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode) +{ + if (treeNode->TypeGet() == TYP_STRUCT) + { + noway_assert(!treeNode->InReg()); + + GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon(); + + LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + + assert(structDesc.passedInRegisters); + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + + regNumber retReg0 = REG_NA; + emitAttr size0 = EA_UNKNOWN; + unsigned offset0 = structDesc.eightByteOffsets[0]; + regNumber retReg1 = REG_NA; + emitAttr size1 = EA_UNKNOWN; + unsigned offset1 = structDesc.eightByteOffsets[1]; + + bool firstIntUsed = false; + bool firstFloatUsed = false; + + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + switch (structDesc.eightByteClassifications[0]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_INTRET; + size0 = EA_4BYTE; + type0 = TYP_INT; + firstIntUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_LNGRET; + size0 = EA_8BYTE; + type0 = TYP_LONG; + firstIntUsed = true; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + retReg0 = REG_LNGRET; + size0 = EA_GCREF; + type0 = TYP_REF; + firstIntUsed = true; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_FLOATRET; + size0 = EA_4BYTE; + type0 = TYP_FLOAT; + firstFloatUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_DOUBLERET; + size0 = EA_8BYTE; + type0 = TYP_DOUBLE; + firstFloatUsed = true; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + + // Set the second eight byte data + switch (structDesc.eightByteClassifications[1]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstIntUsed) + { + retReg1 = REG_INTRET_1; + } + else + { + retReg1 = REG_INTRET; + } + type1 = TYP_INT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_LONG; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_REF; + size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstFloatUsed) + { + retReg1 = REG_FLOATRET_1; + } + else + { + retReg1 = REG_FLOATRET; + } + type1 = TYP_FLOAT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstFloatUsed) + { + retReg1 = REG_DOUBLERET_1; + } + else + { + retReg1 = REG_DOUBLERET; + } + type1 = TYP_DOUBLE; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + + // Move the values into the return registers. + // + + getEmitter()->emitIns_S_R(ins_Store(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0); + getEmitter()->emitIns_S_R(ins_Store(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1); + + return true; + } + + return false; +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Generate code for division (or mod) by power of two // or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) @@ -3366,40 +3769,55 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode) // Generate code for a load from some address + offset -// base: tree node which can be either a local address or arbitrary node -// offset: distance from the base from which to load -void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset) { emitter *emit = getEmitter(); - if (base->OperIsLocalAddr()) + if (baseNode->OperIsLocalAddr()) { - if (base->gtOper == GT_LCL_FLD_ADDR) - offset += base->gtLclFld.gtLclOffs; - emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset); + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + offset += baseNode->gtLclFld.gtLclOffs; + emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset); } else { - emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset); + emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset); } } // Generate code for a store to some address + offset -// base: tree node which can be either a local address or arbitrary node -// offset: distance from the base from which to load -void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset) { emitter *emit = getEmitter(); - if (base->OperIsLocalAddr()) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (baseNode->OperGet() == GT_PUTARG_STK) { - if (base->gtOper == GT_LCL_FLD_ADDR) - offset += base->gtLclFld.gtLclOffs; - emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset); + GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk(); + assert(putArgStkNode->gtOp.gtOp1->isContained()); + assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ); + + emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar, + (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset); } else +#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING { - emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset); + + if (baseNode->OperIsLocalAddr()) + { + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + offset += baseNode->gtLclFld.gtLclOffs; + emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset); + } + else + { + emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset); + } } } @@ -3523,6 +3941,126 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode) instGen(INS_r_movsb); } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// Generates PutArg code by performing a loop unroll +// +// TODO-Amd64-Unix: Try to share code with copyblk. +// The difference for now is thethe putarg_stk contains it's children, while cpyblk not. +// This creates differences in code. After some significant refactoring it could be reused. +void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr srcAddr = putArgNode->gtOp.gtOp1; + + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; + assert(size <= CPBLK_UNROLL_LIMIT); + + emitter *emit = getEmitter(); + + assert(srcAddr->isContained()); + assert(srcAddr->gtOper == GT_LDOBJ); + + if (!srcAddr->gtOp.gtOp1->isContained()) + { + genConsumeReg(srcAddr->gtOp.gtOp1); + } + + unsigned offset = 0; + + // If the size of this struct is larger than 16 bytes + // let's use SSE2 to be able to do 16 byte at a time + // loads and stores. + if (size >= XMM_REGSIZE_BYTES) + { + assert(putArgNode->gtRsvdRegs != RBM_NONE); + regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + assert(genIsValidFloatReg(xmmReg)); + size_t slots = size / XMM_REGSIZE_BYTES; + + while (slots-- > 0) + { + // Load + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node. + // Store + genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset); + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (15 bytes or less) if there's one. + if ((size & 0xf) != 0) + { + // Grab the integer temp register to emit the remaining loads and stores. + regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + + if ((size & 8) != 0) + { +#ifdef _TARGET_X86_ + // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. + for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + } +#else // !_TARGET_X86_ + genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset); + offset += 8; +#endif // !_TARGET_X86_ + } + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + offset += 4; + } + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset); + offset += 2; + } + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset); + } + } +} + +// Generate code for CpBlk by using rep movs +// Preconditions: +// The size argument of the PutArgStk (for structs) is a constant and is between +// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode) +{ + + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr srcAddr = putArgNode->gtOp.gtOp1; +#ifdef DEBUG + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; +#endif // DEBUG + + // Validate state. + assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); + +#ifdef DEBUG + assert(srcAddr->isContained()); + +#ifdef _TARGET_AMD64_ + assert(size > CPBLK_UNROLL_LIMIT); +#else + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); +#endif + +#endif // DEBUG + genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX); + instGen(INS_r_movsb); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. // This will generate a sequence of movsq instructions for the cases of non-gc members @@ -3686,7 +4224,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) { #ifdef _TARGET_AMD64_ // Make sure we got the arguments of the cpblk operation in the right registers - GenTreePtr blockSize = cpBlkNode->Size(); + GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); @@ -3705,7 +4243,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); #else // !_TARGET_AMD64_ - NYI_X86("Helper call for CpBlk"); + noway_assert(false && "Helper call for CpBlk is not needed."); #endif // !_TARGET_AMD64_ } @@ -4558,7 +5096,9 @@ regNumber CodeGen::genConsumeReg(GenTree *tree) // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar genUpdateLife(tree); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(tree->gtRegNum != REG_NA); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // there are three cases where consuming a reg means clearing the bit in the live mask // 1. it was not produced by a local @@ -4678,6 +5218,82 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree) } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) +{ + // The putArgNode children are always contained. We should not consume any registers. + + GenTree* dst = putArgNode; + +#ifdef DEBUG + // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.) + GenTree* src = putArgNode->gtOp.gtOp1; + assert(src->OperGet() == GT_LDOBJ); + src = src->gtOp.gtOp1; +#else // !DEBUG + // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.) + GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1; +#endif // !DEBUG + + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; + GenTree* op1; + GenTree* op2; + + regNumber reg1, reg2, reg3; + op1 = dst; + reg1 = dstReg; + op2 = src; + reg2 = srcReg; + reg3 = sizeReg; + + if (reg2 != REG_NA && op2->gtRegNum != REG_NA) + { + genConsumeReg(op2); + } + + if ((reg1 != REG_NA) && (op1->gtRegNum != reg1)) + { +#if FEATURE_FIXED_OUT_ARGS + // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI. + LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar]; + int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE; + // Outgoing area always on top of the stack (relative to rsp.) + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset); +#else // !FEATURE_FIXED_OUT_ARGS + NYI_X86("Stack args for x86/RyuJIT"); +#endif // !FEATURE_FIXED_OUT_ARGS + + } + + if (op2->gtRegNum != reg2) + { + if (src->OperIsLocalAddr()) + { + // The OperLocalAddr is always contained. + assert(src->isContained()); + GenTreeLclVarCommon* lclNode = src->AsLclVarCommon(); + + // Generate LEA instruction to load the LclVar address in RSI. + LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum]; + int offset = varLclDsc->lvStkOffs; + + // Otutgoing area always on top of the stack (relative to rsp.) + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset); + } + else + { + assert(src->gtRegNum != REG_NA); + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum); + } + } + + if ((reg3 != REG_NA)) + { + inst_RV_IV(INS_mov, reg3, size, EA_8BYTE); + } +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) { // We have to consume the registers, and perform any copies, in the actual execution order. @@ -4827,7 +5443,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) } } - // generates an ip-relative call or indirect call via reg ('call reg') // pass in 'addr' for a relative call or 'base' for a indirect register call // methHnd - optional, only used for pretty printing @@ -4843,9 +5458,9 @@ void CodeGen::genEmitCall(int callType, bool isJump, bool isNoGC) { -#ifndef _TARGET_X86_ +#if !defined(_TARGET_X86_) ssize_t argSize = 0; -#endif // !_TARGET_X86_ +#endif // !defined(_TARGET_X86_) getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) @@ -4867,14 +5482,14 @@ void CodeGen::genEmitCall(int callType, void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - GenTreeIndir* indir + GenTreeIndir* indir X86_ARG(ssize_t argSize), emitAttr retSize, IL_OFFSETX ilOffset) { -#ifndef _TARGET_X86_ +#if !defined(_TARGET_X86_) ssize_t argSize = 0; -#endif // !_TARGET_X86_ +#endif // !defined(_TARGET_X86_) genConsumeAddress(indir->Addr()); getEmitter()->emitIns_Call(emitter::EmitCallType(callType), @@ -4920,13 +5535,49 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (curArgTabEntry->regNum == REG_STK) continue; - regNumber argReg = curArgTabEntry->regNum; - genConsumeReg(argNode); - if (argNode->gtRegNum != argReg) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_LIST) { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + for (; argListPtr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + regNumber argReg = REG_NA; + if (iterationNum == 0) + { + argReg = curArgTabEntry->regNum; + } + else if (iterationNum == 1) + { + argReg = curArgTabEntry->otherRegNum; + } + else + { + assert(false); // Illegal state. + } + + genConsumeReg(putArgRegNode); + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum); + } + } + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } } +#if FEATURE_VARARG // In the case of a varargs call, // the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the @@ -4937,9 +5588,10 @@ void CodeGen::genCallInstruction(GenTreePtr node) instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG); inst_RV_RV(ins, argNode->gtRegNum, targetReg); } +#endif // FEATURE_VARARG } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // The call will pop its arguments. // for each putarg_stk: ssize_t stackArgBytes = 0; @@ -4949,16 +5601,31 @@ void CodeGen::genCallInstruction(GenTreePtr node) GenTreePtr arg = args->gtOp.gtOp1; if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG)) { +#if defined(_TARGET_X86_) assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG)); if (arg->OperGet() == GT_LONG) { assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); } +#endif // defined(_TARGET_X86_) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (genActualType(arg->TypeGet()) == TYP_STRUCT) + { + if (arg->OperGet() == GT_PUTARG_STK) + { + GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj(); + stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass); + } + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); } args = args->gtOp.gtOp2; } -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Insert a null check on "this" pointer if asked. if (call->NeedsNullCheck()) @@ -5056,9 +5723,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(), -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset); } @@ -5070,9 +5737,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir(), -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset); } @@ -5086,9 +5753,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, //addr -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset, genConsumeReg(target)); @@ -5153,9 +5820,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // _defined(_TARGET_X86_) retSize, ilOffset); } @@ -5168,10 +5835,10 @@ void CodeGen::genCallInstruction(GenTreePtr node) genPendingCallLabel = nullptr; } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) // The call will pop its arguments. genStackLevel -= stackArgBytes; -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) // Update GC info: // All Callee arg registers are trashed and no longer contain any GC pointers. @@ -5218,6 +5885,130 @@ void CodeGen::genCallInstruction(GenTreePtr node) } } +//------------------------------------------------------------------------ +// genGetStructTypeSizeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems. +// +// Arguments: +// 'structDesc' struct description +// 'type0' returns the type of the first eightbyte. +// 'type1' returns the type of the second eightbyte. +// 'size0' returns the size of the first eightbyte. +// 'size1' returns the size of the second eightbyte. +// 'offset0' returns the offset of the first eightbyte. +// 'offset1' returns the offset of the second eightbyte. +// + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +void CodeGen::genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, + var_types* type0, var_types* type1, emitAttr* size0, emitAttr* size1, + unsigned __int8* offset0, unsigned __int8* offset1) +{ + *size0 = EA_UNKNOWN; + *offset0 = structDesc.eightByteOffsets[0]; + *size1 = EA_UNKNOWN; + *offset1 = structDesc.eightByteOffsets[1]; + + *type0 = TYP_UNKNOWN; + *type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + if (structDesc.eightByteCount >= 1) + { + switch (structDesc.eightByteClassifications[0]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + *size0 = EA_4BYTE; + *type0 = TYP_INT; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + *size0 = EA_8BYTE; + *type0 = TYP_LONG; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + *size0 = EA_GCREF; + *type0 = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + *size0 = EA_4BYTE; + *type0 = TYP_FLOAT; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + *size0 = EA_8BYTE; + *type0 = TYP_DOUBLE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + } + + // Set the second eight byte data + if (structDesc.eightByteCount == 2) + { + switch (structDesc.eightByteClassifications[1]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + *type1 = TYP_INT; + *size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + *type1 = TYP_LONG; + *size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + *type1 = TYP_REF; + *size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + *type1 = TYP_FLOAT; + *size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + *type1 = TYP_DOUBLE; + *size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + } +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Produce code for a GT_JMP node. // The arguments of the caller needs to be transferred to the callee before exiting caller. // The actual jump to callee is generated as part of caller epilog sequence. @@ -5319,36 +6110,94 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) if (!varDsc->lvIsRegArg) continue; - // Register argument - noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (varDsc->lvType == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); - // Is register argument already in the right register? - // If not load it from its stack location. - var_types loadType = varDsc->lvaArgType(); - regNumber argReg = varDsc->lvArgReg; // incoming arg register + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); - if (varDsc->lvRegNum != argReg) - { - assert(genIsValidReg(argReg)); + emitAttr size0 = EA_UNKNOWN; + emitAttr size1 = EA_UNKNOWN; + unsigned __int8 offset0 = 0; + unsigned __int8 offset1 = 0; + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Get the eightbyte data + genGetStructTypeSizeOffset(structDesc, &type0, &type1, &size0, &size1, &offset0, &offset1); + + // Move the values into the right registers. + // + if (type0 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type0), size0, varDsc->lvArgReg, varNum, offset0); + + // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0); + } + + if (type1 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type1), size1, varDsc->lvOtherArgReg, varNum, offset1); - getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1); + } - // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. - // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. - // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block - // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). - regSet.rsMaskVars |= genRegMask(argReg); - gcInfo.gcMarkRegPtrVal(argReg, loadType); if (varDsc->lvTracked) { - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } + else +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // Register argument + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + // Is register argument already in the right register? + // If not load it from its stack location. + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + + if (varDsc->lvRegNum != argReg) + { + assert(genIsValidReg(argReg)); + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(argReg); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + if (varDsc->lvTracked) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + } + } + } + +#if FEATURE_VARARG // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register. if (compiler->info.compIsVarArgs) { regNumber intArgReg; + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + if (varTypeIsFloating(loadType)) { intArgReg = compiler->getCallArgIntRegister(argReg); @@ -5368,8 +6217,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) firstArgVarNum = varNum; } } +#endif // FEATURE_VARARG } +#if FEATURE_VARARG // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, // load the remaining arg registers (both int and float) from the corresponding // shadow stack slots. This is for the reason that we don't know the number and type @@ -5409,7 +6260,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) getEmitter()->emitEnableGC(); } } - +#endif // FEATURE_VARARG } // produce code for a GT_LEA subnode @@ -6488,13 +7339,122 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode) genProduceReg(treeNode); } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +//--------------------------------------------------------------------- +// genPutArgStk - generate code for putting a struct arg on the stack by value. +// In case there are references to heap object in the struct, +// it generates the gcinfo as well. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// void CodeGen::genPutArgStk(GenTreePtr treeNode) { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(treeNode->OperGet() == GT_PUTARG_STK); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING var_types targetType = treeNode->TypeGet(); +#ifdef _TARGET_X86_ noway_assert(targetType != TYP_STRUCT); +#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(targetType == TYP_STRUCT); + + GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk(); + if (putArgStk->gtNumberReferenceSlots == 0) + { + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::PutArgStkKindRepInstr: + genCodeForPutArgRepMovs(putArgStk); + break; + case GenTreePutArgStk::PutArgStkKindUnroll: + genCodeForPutArgUnroll(putArgStk); + break; + default: + unreached(); + } + } + else + { + // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA); + GenTreePtr dstAddr = putArgStk; + GenTreePtr srcAddr = putArgStk->gtOp.gtOp1; + gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet()); + gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); + + unsigned slots = putArgStk->gtNumSlots; + + // We are always on the stack we don't need to use the write barrier. + BYTE* gcPtrs = putArgStk->gtGcPtrs; + unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots; + + unsigned i = 0; + unsigned copiedSlots = 0; + while (i < slots) + { + switch (gcPtrs[i]) + { + case TYPE_GC_NONE: + // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // to save cycles and code size. + { + unsigned nonGcSlotCount = 0; + + do + { + nonGcSlotCount++; + i++; + } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); + + // If we have a very small contiguous non-gc region, it's better just to + // emit a sequence of movsq instructions + if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + copiedSlots += nonGcSlotCount; + while (nonGcSlotCount > 0) + { + instGen(INS_movsq); + nonGcSlotCount--; + } + } + else + { + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + copiedSlots += nonGcSlotCount; + instGen(INS_r_movsq); + } + } + break; + default: + // We have a GC pointer + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction, + // but the logic for emitting a GC info record is not available (it is internal for the emitter only.) + // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info. + + getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0); + getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar, + ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE)); + getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE); + copiedSlots++; + gcPtrCount--; + i++; + } + } + + gcInfo.gcMarkRegSetNpt(RBM_RSI); + gcInfo.gcMarkRegSetNpt(RBM_RDI); + } + return; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); GenTreePtr data = treeNode->gtOp.gtOp1; @@ -6508,7 +7468,9 @@ CodeGen::genPutArgStk(GenTreePtr treeNode) // Decrement SP. int argSize = genTypeSize(genActualType(targetType)); inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING genStackLevel += argSize; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? if (data->isContained()) @@ -6522,7 +7484,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode) getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); } } -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /***************************************************************************** * diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 427d778b90..b54657202a 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -2992,7 +2992,6 @@ void Compiler::compCompile(void * * methodCodePtr, unsigned compileFlags) { hashBv::Init(this); - VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal()); /* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */ @@ -4042,7 +4041,6 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas unsigned compileFlags, CorInfoInstantiationVerification instVerInfo) { - CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd; info.compCode = methodInfo->ILCode; @@ -5027,6 +5025,125 @@ START: return result; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +// GetTypeFromClassificationAndSizes: +// Returns the type of the eightbyte accounting for the classification and size of the eightbyte. +// +// args: +// classType: classification type +// size: size of the eightbyte. +// +var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size) +{ + var_types type = TYP_UNKNOWN; + switch (classType) + { + case SystemVClassificationTypeInteger: + if (size == 1) + { + type = TYP_BYTE; + } + else if (size <= 2) + { + type = TYP_SHORT; + } + else if (size <= 4) + { + type = TYP_INT; + } + else if (size <= 8) + { + type = TYP_LONG; + } + else + { + assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type."); + } + break; + case SystemVClassificationTypeIntegerReference: + type = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (size <= 4) + { + type = TYP_FLOAT; + } + else if (size <= 8) + { + type = TYP_DOUBLE; + } + else + { + assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type."); + } + break; + + default: + assert(false && "GetTypeFromClassificationAndSizes Invalid classification type."); + break; + } + + return type; +} + +// getEightByteType: +// Returns the type of the struct description and slot number of the eightbyte. +// +// args: +// structDesc: struct classification description. +// slotNum: eightbyte slot number for the struct. +// +var_types Compiler::getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum) +{ + var_types eightByteType = TYP_UNDEF; + unsigned len = structDesc.eightByteSizes[slotNum]; + + switch (structDesc.eightByteClassifications[slotNum]) + { + case SystemVClassificationTypeInteger: + // See typelist.h for jit type definition. + // All the types of size < 4 bytes are of jit type TYP_INT. + if (structDesc.eightByteSizes[slotNum] <= 4) + { + eightByteType = TYP_INT; + } + else if (structDesc.eightByteSizes[slotNum] <= 8) + { + eightByteType = TYP_LONG; + } + else + { + assert(false && "getEightByteType Invalid Integer classification type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(len == REGSIZE_BYTES); + eightByteType = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[slotNum] <= 4) + { + eightByteType = TYP_FLOAT; + } + else if (structDesc.eightByteSizes[slotNum] <= 8) + { + eightByteType = TYP_DOUBLE; + } + else + { + assert(false && "getEightByteType Invalid SSE classification type."); + } + break; + default: + assert(false && "getEightByteType Invalid classification type."); + break; + } + + return eightByteType; +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + /*****************************************************************************/ /*****************************************************************************/ diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 520c94a462..bc851dcf1d 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -269,9 +269,12 @@ public: unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout" -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned char lvDontPromote:1; // Should struct promoter consider this variable for promotion? - unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument. +#endif + +#ifdef _TARGET_ARM_ + unsigned char lvIsHfaRegArg :1; // Is this argument variable holding a HFA register argument. unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double? #endif @@ -290,7 +293,7 @@ public: unsigned char lvSIMDType :1; // This is a SIMD struct unsigned char lvUsedInSIMDIntrinsic :1; // This tells lclvar is used for simd intrinsic #endif // FEATURE_SIMD - unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct. + unsigned char lvRegStruct :1; // This is a reg-sized non-field-addressed struct. union { @@ -305,6 +308,26 @@ public: unsigned char lvFldOffset; unsigned char lvFldOrdinal; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber lvRegNumForSlot(unsigned slotNum) + { + if (slotNum == 0) + { + return lvArgReg; + } + else if (slotNum == 1) + { + return lvOtherArgReg; + } + else + { + assert(false && "Invalid slotNum!"); + } + + unreached(); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + private: regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair). @@ -314,7 +337,13 @@ private: #if !defined(_TARGET_64BIT_) regNumberSmall _lvOtherReg; // Used for "upper half" of long var. #endif // !defined(_TARGET_64BIT_) + regNumberSmall _lvArgReg; // The register in which this argument is passed. + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + #ifndef LEGACY_BACKEND union { @@ -382,7 +411,7 @@ public: regNumber lvArgReg; regNumber GetArgReg() const -{ + { return (regNumber) _lvArgReg; } @@ -392,6 +421,22 @@ public: assert(_lvArgReg == reg); } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg)) + regNumber lvOtherArgReg; + + regNumber GetOtherArgReg() const + { + return (regNumber)_lvOtherArgReg; + } + + void SetOtherArgReg(regNumber reg) + { + _lvOtherArgReg = (regNumberSmall)reg; + assert(_lvOtherArgReg == reg); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + #ifdef FEATURE_SIMD // Is this is a SIMD struct? bool lvIsSIMDType() const @@ -1139,6 +1184,15 @@ struct FuncInfoDsc struct fgArgTabEntry { + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fgArgTabEntry() + { + otherRegNum = REG_NA; + isStruct = false; // is this a struct arg + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or placeholder // it will point at the actual argument in the gtCallLateArgs list. GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument @@ -1165,6 +1219,13 @@ struct fgArgTabEntry bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments. bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber otherRegNum; // The (second) register to use when passing this argument. + bool isStruct; // is this a struct arg + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void SetIsHfaRegArg(bool hfaRegArg) { isHfaRegArg = hfaRegArg; @@ -1196,10 +1257,10 @@ class fgArgInfo unsigned nextSlotNum; // Updatable slot count value unsigned stkLevel; // Stack depth when we make this call (for x86) - unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs) - bool argsComplete; // marker for state - bool argsSorted; // marker for state - fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize]) + unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs) + bool argsComplete; // marker for state + bool argsSorted; // marker for state + fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize]) private: @@ -1217,11 +1278,24 @@ public: unsigned numRegs, unsigned alignment); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + fgArgTabEntryPtr AddRegArg (unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment, + const bool isStruct, + const regNumber otherRegNum = REG_NA, + const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + fgArgTabEntryPtr AddStkArg (unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, - unsigned alignment); + unsigned alignment + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct)); void RemorphReset (); fgArgTabEntryPtr RemorphRegArg (unsigned argNum, @@ -1391,7 +1465,9 @@ public: DWORD expensiveDebugCheckLevel; #endif - +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + CORINFO_CLASS_HANDLE GetStructClassHandle(GenTreePtr tree); +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #ifdef _TARGET_ARM_ @@ -1403,8 +1479,6 @@ public: // floating-point registers. // - inline CORINFO_CLASS_HANDLE GetHfaClassHandle(GenTreePtr tree); - bool IsHfa(CORINFO_CLASS_HANDLE hClass); bool IsHfa(GenTreePtr tree); @@ -1417,6 +1491,14 @@ public: #endif // _TARGET_ARM_ //------------------------------------------------------------------------- + // The following is used for struct passing on System V system. + // +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + bool IsRegisterPassable(CORINFO_CLASS_HANDLE hClass); + bool IsRegisterPassable(GenTreePtr tree); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + //------------------------------------------------------------------------- // The following is used for validating format of EH table // @@ -2450,7 +2532,7 @@ public : unsigned char fldOrdinal; var_types fldType; unsigned fldSize; - CORINFO_CLASS_HANDLE fldTypeHnd; + CORINFO_CLASS_HANDLE fldTypeHnd; }; // Info about struct to be promoted. @@ -3006,9 +3088,12 @@ private: bool impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &opcode); void impAbortInline(bool abortThisInlineOnly, bool contextDependent, const char *reason); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass); - GenTreePtr impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); +#endif + +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTreePtr impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); #endif // A free list of linked list nodes used to represent to-do stacks of basic blocks. @@ -3026,9 +3111,11 @@ private: bool impIsValueType (typeInfo* pTypeInfo); var_types mangleVarArgsType (var_types type); + +#if FEATURE_VARARG regNumber getCallArgIntRegister (regNumber floatReg); regNumber getCallArgFloatRegister (regNumber intReg); - +#endif // FEATURE_VARARG //--------------------------- Inlining------------------------------------- #if defined(DEBUG) || MEASURE_INLINING @@ -4080,10 +4167,9 @@ public: bool fgCastNeeded(GenTreePtr tree, var_types toType); GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree); - GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum); - - /* The following check for loops that don't execute calls */ + GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters)); + // The following check for loops that don't execute calls bool fgLoopCallMarked; void fgLoopCallTest (BasicBlock *srcBB, @@ -4450,7 +4536,14 @@ private: GenTreePtr fgMorphCast (GenTreePtr tree); GenTreePtr fgUnwrapProxy (GenTreePtr objRef); GenTreeCall* fgMorphArgs (GenTreeCall* call); - void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass); + + void fgMakeOutgoingStructArgCopy( + GenTreeCall* call, + GenTree* args, + unsigned argIndex, + CORINFO_CLASS_HANDLE copyBlkClass + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr)); + void fgFixupStructReturn (GenTreePtr call); GenTreePtr fgMorphLocalVar (GenTreePtr tree); bool fgAddrCouldBeNull (GenTreePtr addr); @@ -4570,11 +4663,11 @@ private: void fgInsertInlineeBlocks (InlineInfo * pInlineInfo); GenTreePtr fgInlinePrependStatements(InlineInfo * inlineInfo); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree); - GenTreePtr fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); - void fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); -#endif + GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); + void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder; #ifdef DEBUG @@ -6275,6 +6368,17 @@ public : void eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause); + // ICorStaticInfo wrapper functions + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#ifdef DEBUG + static void dumpSystemVClassificationType(SystemVClassificationType ct); +#endif // DEBUG + + void eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd, + /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Utility functions #if defined(DEBUG) @@ -8433,6 +8537,11 @@ public: static HelperCallProperties s_helperCallProperties; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size); + var_types getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum); + void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) }; // end of class Compiler // Inline methods of CompAllocator. @@ -8466,7 +8575,6 @@ LclVarDsc::LclVarDsc(Compiler* comp) { } - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 1cdc939d16..e4168b0f18 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -651,7 +651,10 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, if (type == TYP_STRUCT) { size = info.compCompHnd->getClassSize(typeClass); - +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Account for the classification of the struct. + result = IsRegisterPassable(typeClass); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING switch(size) { case 1: @@ -664,6 +667,7 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, default: break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -2268,8 +2272,10 @@ int Compiler::lvaFrameAddress(int varNum, bool * pFPbased) if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame) { #ifdef _TARGET_AMD64_ - // On amd64, every param has a stack location. + // On amd64, every param has a stack location, except on Unix-like systems. +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(varDsc->lvIsParam); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) // For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location. assert(varDsc->lvIsParam && !varDsc->lvIsRegArg); @@ -2589,6 +2595,8 @@ var_types Compiler::mangleVarArgsType(var_types type) return type; } +// For CORECLR there is no vararg on System V systems. +#if FEATURE_VARARG inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg) { #ifdef _TARGET_AMD64_ @@ -2630,10 +2638,11 @@ inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg) } #else // !_TARGET_AMD64_ // How will float args be passed for RyuJIT/x86? - NYI("getCallArgIntRegister for RyuJIT/x86"); + NYI("getCallArgFloatRegister for RyuJIT/x86"); return REG_NA; #endif // !_TARGET_AMD64_ } +#endif // FEATURE_VARARG /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index 90e50ed84a..4c8e2ff30e 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -281,6 +281,16 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_ // Everything fits into a single 'slot' size // to accommodate irregular sized structs, they are passed byref // TODO-ARM64-Bug?: structs <= 16 bytes get passed in 2 consecutive registers. +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + CORINFO_CLASS_HANDLE argClass; + CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass)); + var_types argType = JITtype2varType(argTypeJit); + if (argType == TYP_STRUCT) + { + unsigned structSize = info.compCompHnd->getClassSize(argClass); + return structSize; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING return sizeof(size_t); #else // !_TARGET_AMD64_ && !_TARGET_ARM64_ @@ -920,6 +930,60 @@ int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field) } } + +/***************************************************************************** + * + * ICorStaticInfo wrapper functions + */ + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#ifdef DEBUG +void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct) +{ + switch (ct) + { + case SystemVClassificationTypeUnknown: printf("UNKNOWN"); break; + case SystemVClassificationTypeStruct: printf("Struct"); break; + case SystemVClassificationTypeNoClass: printf("NoClass"); break; + case SystemVClassificationTypeMemory: printf("Memory"); break; + case SystemVClassificationTypeInteger: printf("Integer"); break; + case SystemVClassificationTypeIntegerReference: printf("IntegerReference"); break; + case SystemVClassificationTypeSSE: printf("SSE"); break; + default: printf("ILLEGAL"); break; + } +} +#endif // DEBUG + +void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd, + /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr) +{ + bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr); + noway_assert(ok); + +#ifdef DEBUG + if (verbose) + { + printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd), eeGetClassName(structHnd)); + printf(" passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters)); + if (structPassInRegDescPtr->passedInRegisters) + { + printf(" eightByteCount = %d\n", structPassInRegDescPtr->eightByteCount); + for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++) + { + printf(" eightByte #%d -- classification: ", i); + dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]); + printf(", byteSize: %d, byteOffset: %d\n", + structPassInRegDescPtr->eightByteSizes[i], + structPassInRegDescPtr->eightByteOffsets[i]); + } + } + } +#endif // DEBUG +} + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + /***************************************************************************** * * Utility functions diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 20f8af3fa2..fa9d3597de 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -5653,8 +5653,9 @@ void emitter::emitRecordGCcall(BYTE * codePos, call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs; call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs; #if EMIT_TRACK_STACK_DEPTH +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned)))); - call->cdArgBaseOffset = (USHORT)(emitCurStackLvl / ((unsigned)sizeof(unsigned))); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #endif // Append the call descriptor to the list */ diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 6f1c6c8fce..d6de1f2dba 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -3671,7 +3671,8 @@ void emitter::emitIns_C(instruction ins, } else if (ins == INS_pop) { - emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); + emitCurStackLvl -= emitCntStackDepth; + assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS @@ -11010,7 +11011,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); - emitStackPop (dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*))); + emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*))); } break; diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 84233d82c6..c26f221c3f 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -8148,17 +8148,67 @@ void Compiler::fgAddInternal() // If there is a return value, then create a temp for it. Real returns will store the value in there and // it'll be reloaded by the single return. - + // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs? + // TODO-ARM64: Does this apply for ARM64 too? +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Create a local temp to store the return if the return type is not void and the + // native return type is not a struct or the native return type is a struct that is returned + // in registers (no RetBuffArg argument.) + // If we fold all returns into a single return statement, create a temp for struct type variables as well. + if (genReturnBB && ((info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT) || + (info.compRetNativeType == TYP_STRUCT && info.compRetBuffArg == BAD_VAR_NUM))) +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (genReturnBB && (info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT)) +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value")); - lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types retLocalType = TYP_STRUCT; + if (info.compRetNativeType == TYP_STRUCT) + { + // If the native ret type is a struct, make sure the right + // normalized type is assigned to the local variable. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + assert(info.compMethodInfo->args.retTypeClass != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc); + if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1) + { + retLocalType = lvaTable[genReturnLocal].lvType = getEightByteType(structDesc, 0); + } + else + { + lvaTable[genReturnLocal].lvType = TYP_STRUCT; + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType); + } if (varTypeIsFloating(lvaTable[genReturnLocal].lvType)) { this->compFloatingPointUsed = true; } - + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Handle a struct return type for System V Amd64 systems. + if (info.compRetNativeType == TYP_STRUCT) + { + // Handle the normalized return type. + if (retLocalType == TYP_STRUCT) + { + lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true); + } + else + { + lvaTable[genReturnLocal].lvVerTypeInfo = typeInfo(TI_STRUCT, info.compMethodInfo->args.retTypeClass); + } + + lvaTable[genReturnLocal].lvDontPromote = true; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!varTypeIsFloating(info.compRetType)) lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this); #ifdef REG_FLOATRET @@ -8172,7 +8222,6 @@ void Compiler::fgAddInternal() lvaTable[genReturnLocal].lvKeepType = 1; #endif } - else { genReturnLocal = BAD_VAR_NUM; @@ -8442,7 +8491,11 @@ void Compiler::fgAddInternal() //make sure to reload the return value as part of the return (it is saved by the "real return"). if (genReturnLocal != BAD_VAR_NUM) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(info.compRetType != TYP_VOID); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet()); //make sure copy prop ignores this node (make sure it always does a reload from the temp). @@ -21424,7 +21477,7 @@ void Compiler::fgInline() #endif // DEBUG } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /********************************************************************************* * @@ -21463,16 +21516,16 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree) /*************************************************************************************************** * child - The inlinee of the retExpr node. - * retClsHnd - The HFA class handle of the type of the inlinee. + * retClsHnd - The struct class handle of the type of the inlinee. * * Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can * use a copyblock to do the assignment. */ -GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) +GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) { assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY); - unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for HFA inline return candidates.")); + unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates.")); lvaSetStruct(tmpNum, retClsHnd, false); GenTreePtr dst = gtNewLclvNode(tmpNum, TYP_STRUCT); @@ -21518,7 +21571,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN /*************************************************************************************************** * tree - The tree pointer that has one of its child nodes as retExpr. * child - The inlinee child. - * retClsHnd - The HFA class handle of the type of the inlinee. + * retClsHnd - The struct class handle of the type of the inlinee. * * V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or * would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is @@ -21526,7 +21579,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN * a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1)) * can just be op1. */ -void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) +void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) { // We are okay to have: // 1. V02 = call(); @@ -21541,13 +21594,13 @@ void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINF GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1); GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL) - ? fgAssignHfaInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. + ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. : child); // Just get the address, if not a call. tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this); } -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /***************************************************************************** * Callback to replace the inline return expression place holder (GT_RET_EXPR) @@ -21562,12 +21615,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( if (tree->gtOper == GT_RET_EXPR) { -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // We are going to copy the tree from the inlinee, so save the handle now. CORINFO_CLASS_HANDLE retClsHnd = (tree->TypeGet() == TYP_STRUCT) ? tree->gtRetExpr.gtRetClsHnd : NO_CLASS_HANDLE; -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) do { @@ -21605,32 +21658,36 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( } while (tree->gtOper == GT_RET_EXPR); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(_TARGET_ARM_) if (retClsHnd != NO_CLASS_HANDLE && comp->IsHfa(retClsHnd)) +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (retClsHnd != NO_CLASS_HANDLE && comp->IsRegisterPassable(retClsHnd)) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { GenTreePtr parent = data->parent; // See assert below, we only look one level above for an asg parent. if (parent->gtOper == GT_ASG) { // Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk. - comp->fgAttachHfaInlineeToAsg(parent, tree, retClsHnd); + comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd); } else { // Just assign the inlinee to a variable to keep it simple. - tree->CopyFrom(comp->fgAssignHfaInlineeToVar(tree, retClsHnd), comp); + tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp); } } -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#if defined(DEBUG) && defined(_TARGET_ARM_) +#if defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) // Make sure we don't have a tree like so: V05 = (, , , retExpr); // Since we only look one level above for the parent for '=' and // do not check if there is a series of COMMAs. See above. // Importer and FlowGraph will not generate such a tree, so just // leaving an assert in here. This can be fixed by looking ahead - // when we visit GT_ASG similar to fgAttachHfaInlineeToAsg. + // when we visit GT_ASG similar to fgAttachStructInlineeToAsg. else if (tree->gtOper == GT_ASG && tree->gtOp.gtOp2->gtOper == GT_COMMA) { @@ -21642,11 +21699,17 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( // empty } +#if defined(_TARGET_ARM_) + noway_assert(comma->gtType != TYP_STRUCT || + comma->gtOper != GT_RET_EXPR || + (!comp->IsHfa(comma->gtRetExpr.gtRetClsHnd))); +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(comma->gtType != TYP_STRUCT || comma->gtOper != GT_RET_EXPR || - !comp->IsHfa(comma->gtRetExpr.gtRetClsHnd)); + (!comp->IsRegisterPassable(comma->gtRetExpr.gtRetClsHnd))); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#endif // defined(DEBUG) && defined(_TARGET_ARM_) +#endif // defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) return WALK_CONTINUE; } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 284000e55b..3c06925fe4 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -224,7 +224,15 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL; } - /* Now set all of the appropriate entries to 'large' */ + // Now set all of the appropriate entries to 'large' + + // On ARM and System V struct returning there + // is code that does GT_ASG-tree.CopyObj call. + // CopyObj is a large node and the GT_ASG is small, which triggers an exception. +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE; +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTree::s_gtNodeSizes[GT_CALL ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_CAST ] = TREE_NODE_SZ_LARGE; @@ -256,6 +264,15 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_MOD ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UMOD ] = TREE_NODE_SZ_LARGE; #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + GenTree::s_gtNodeSizes[GT_PUTARG_STK ] = TREE_NODE_SZ_LARGE; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // In importer for Hfa and register returned structs we rewrite GT_ASG to GT_COPYOBJ/GT_CPYBLK + // Make sure the sizes agree. + assert(GenTree::s_gtNodeSizes[GT_COPYOBJ] <= GenTree::s_gtNodeSizes[GT_ASG]); + assert(GenTree::s_gtNodeSizes[GT_COPYBLK] <= GenTree::s_gtNodeSizes[GT_ASG]); +#endif // !(defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -312,7 +329,12 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef FEATURE_SIMD static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL); #endif // FEATURE_SIMD @@ -4366,13 +4388,21 @@ void GenTree::InsertAfterSelf(GenTree* node, GenTreeStmt* stmt /* = n // 'parent' must be non-null // // Notes: -// Must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context -// in which this method is used) +// For non System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING not defined) +// this method must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context +// in which this method is used). +// If FEATURE_UNIX_AMD64_STRUCT_PASSING is defined we can get here with GT_LDOBJ tree. This happens when +// a struct is passed in two registers. The GT_LDOBJ is converted to a GT_LIST with two GT_LCL_FLDs later +// in Lower/LowerXArch. +// GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(parent->OperGet() != GT_LDOBJ); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + switch (parent->OperGet()) { default: @@ -4380,6 +4410,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) if (this == parent->gtOp.gtOp1) return &(parent->gtOp.gtOp1); if (this == parent->gtOp.gtOp2) return &(parent->gtOp.gtOp2); break; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_LDOBJ: + // Any GT_LDOBJ with a field must be lowered before this point. + noway_assert(parent->AsLdObj()->gtFldTreeList == nullptr); + break; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_CMPXCHG: if (this == parent->gtCmpXchg.gtOpLocation) return &(parent->gtCmpXchg.gtOpLocation); if (this == parent->gtCmpXchg.gtOpValue) return &(parent->gtCmpXchg.gtOpValue); @@ -5027,7 +5065,7 @@ GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inline GenTreePtr node = new(this, GT_RET_EXPR) GenTreeRetExpr(type); node->gtRetExpr.gtInlineCandidate = inlineCandidate; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (inlineCandidate->gtType == TYP_STRUCT) { if (inlineCandidate->gtOper == GT_CALL) @@ -5067,7 +5105,13 @@ GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2) GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op) { - assert((op != NULL) && (op->OperGet() != GT_LIST) && (op->OperGet() != GT_LIST)); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // With structs passed in multiple args we could have the arg + // GT_LIST containing a list of LCL_FLDs + assert((op != NULL) && ((!op->IsList()) || (op->IsListOfLclFlds()))); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert((op != NULL) && (op->OperGet() != GT_LIST)); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING return new (this, GT_LIST) GenTreeArgList(op); } @@ -5079,8 +5123,15 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op) GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op1, GenTreePtr op2) { - assert((op1 != NULL) && (op1->OperGet() != GT_LIST) && (op1->OperGet() != GT_LIST)); - assert((op2 != NULL) && (op2->OperGet() != GT_LIST) && (op2->OperGet() != GT_LIST)); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // With structs passed in multiple args we could have the arg + // GT_LIST containing a list of LCL_FLDs + assert((op1 != NULL) && ((!op1->IsList()) || (op1->IsListOfLclFlds()))); + assert((op2 != NULL) && ((!op2->IsList()) || (op2->IsListOfLclFlds()))); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert((op1 != NULL) && (!op1->IsList())); + assert((op2 != NULL) && (!op2->IsList())); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING GenTreePtr tree; @@ -5207,9 +5258,11 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB // using struct assignment. #ifdef _TARGET_ARM_ assert(isPhiDefn || type != TYP_STRUCT || IsHfa(dst) || IsHfa(src)); -#else +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // You need to use GT_COPYBLK for assigning structs // See impAssignStruct() + assert(isPhiDefn || type != TYP_STRUCT || IsRegisterPassable(dst) || IsRegisterPassable(src)); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING assert(isPhiDefn || type != TYP_STRUCT); #endif @@ -5553,7 +5606,6 @@ GenTreePtr Compiler::gtClone(GenTree * tree, bool complexOK) tree->gtField.gtFldHnd, objp, tree->gtField.gtFldOffset); - } else if (tree->gtOper == GT_ADD) { @@ -8629,6 +8681,51 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt, // get child msg if (tree->IsCall()) { + // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers) + // print the nodes of the nested list and continue to the next argument. + if (listElem->gtOper == GT_LIST) + { + GenTreePtr nextListNested = nullptr; + for (GenTreePtr listNested = listElem; listNested != nullptr; listNested = nextListNested) + { + GenTreePtr listElemNested; + if (listNested->gtOper == GT_LIST) + { + nextListNested = listNested->MoveNext(); + listElemNested = listNested->Current(); + } + else + { + // GT_LIST nodes (under initBlk, others?) can have a non-null op2 that's not a GT_LIST + nextListNested = nullptr; + listElemNested = listNested; + } + + indentStack->Push(indentInfo); + if (child == tree->gtCall.gtCallArgs) + { + gtGetArgMsg(tree, listNested, listElemNum, bufp, BufLength); + } + else + { + assert(child == tree->gtCall.gtCallLateArgs); + gtGetLateArgMsg(tree, listNested, listElemNum, bufp, BufLength); + } + nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElemNested, indentStack, bufp); + indentStack->Pop(); + } + + // Skip the GT_LIST nodes, as we do not print them, and the next node to print will occur + // after the list. + while (nextLinearNode->OperGet() == GT_LIST) + { + nextLinearNode = nextLinearNode->gtNext; + } + + listElemNum++; + continue; + } + if (child == tree->gtCall.gtCallArgs) { gtGetArgMsg(tree, listElem, listElemNum, bufp, BufLength); @@ -8643,6 +8740,7 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt, { sprintf_s(bufp, sizeof(buf), "List Item %d", listElemNum); } + indentStack->Push(indentInfo); nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElem, indentStack, bufp); indentStack->Pop(); @@ -10179,6 +10277,7 @@ LNG_ADD_CHKOVF: } } } + lval1 = ltemp; break; case GT_OR : lval1 |= lval2; break; diff --git a/src/jit/gentree.h b/src/jit/gentree.h index f6c850ea5a..1402445da0 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1027,6 +1027,11 @@ public: return OperIsCopyBlkOp(OperGet()); } + bool OperIsPutArgStk() const + { + return gtOper == GT_PUTARG_STK; + } + bool OperIsAddrMode() const { return OperIsAddrMode(OperGet()); @@ -1125,7 +1130,7 @@ public: static int OperIsSimple(genTreeOps gtOper) { - return (OperKind(gtOper) & GTK_SMPOP ) != 0; + return (OperKind(gtOper) & GTK_SMPOP ) != 0; } static @@ -1294,7 +1299,7 @@ public: static inline bool RequiresNonNullOp2(genTreeOps oper); - + bool IsListOfLclFlds(); #endif // DEBUG inline bool IsZero(); @@ -2277,7 +2282,7 @@ struct GenTreeColon: public GenTreeOp /* gtCall -- method call (GT_CALL) */ typedef class fgArgInfo * fgArgInfoPtr; -struct GenTreeCall: public GenTree +struct GenTreeCall final : public GenTree { GenTreePtr gtCallObjp; // The instance argument ('this' pointer) GenTreeArgList* gtCallArgs; // The list of arguments in original evaluation order @@ -2296,6 +2301,14 @@ struct GenTreeCall: public GenTree CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + + void SetRegisterReturningStructState(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDescIn) + { + structDesc.CopyFrom(structDescIn); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #define GTF_CALL_M_EXPLICIT_TAILCALL 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks #define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall @@ -2438,9 +2451,12 @@ struct GenTreeCall: public GenTree GenTreeCall(var_types type) : GenTree(GT_CALL, type) - {} + { + } #if DEBUGGABLE_GENTREE - GenTreeCall() : GenTree() {} + GenTreeCall() : GenTree() + { + } #endif }; @@ -3024,7 +3040,7 @@ struct GenTreeRetExpr: public GenTree { GenTreePtr gtInlineCandidate; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) CORINFO_CLASS_HANDLE gtRetClsHnd; #endif @@ -3243,10 +3259,26 @@ struct GenTreePutArgStk: public GenTreeUnOp // Fast tail calls set this to true. // In future if we need to add more such bool fields consider bit fields. - GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum, bool _putInIncomingArgArea = false - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), - gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + bool _putInIncomingArgArea = false + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), + gtSlotNum(slotNum), + putInIncomingArgArea(_putInIncomingArgArea) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3254,22 +3286,53 @@ struct GenTreePutArgStk: public GenTreeUnOp } - GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum, bool _putInIncomingArgArea = false - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), - gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + GenTreePtr op1, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + bool _putInIncomingArgArea = false + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), + gtSlotNum(slotNum), + putInIncomingArgArea(_putInIncomingArgArea) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; #endif } -#else // !FEATURE_FASTTAIL_CALL - - GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), - gtSlotNum(slotNum) +#else // !FEATURE_FASTTAILCALL + + GenTreePutArgStk( + genTreeOps oper, + var_types type, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), + gtSlotNum(slotNum) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3277,10 +3340,25 @@ struct GenTreePutArgStk: public GenTreeUnOp } - GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), - gtSlotNum(slotNum) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + GenTreePtr op1, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), + gtSlotNum(slotNum) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3288,10 +3366,53 @@ struct GenTreePutArgStk: public GenTreeUnOp } #endif // FEATURE_FASTTAILCALL +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + //------------------------------------------------------------------------ + // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM. + // + // Arguments: + // numPointers - Number of pointer references. + // pointers - layout of the struct (with pointers marked.) + // + // Return Value: + // None + // + // Notes: + // This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value. + // If no pointer references are used, block copying instructions are used. + // Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted. + // Any non pointer references between the pointer reference slots are copied in block fashion. + // + void setGcPointers(unsigned numPointers, BYTE* pointers) + { + gtNumberReferenceSlots = numPointers; + gtGcPtrs = pointers; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef DEBUG GenTreePtr gtCall; // the call node to which this argument belongs #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Instruction selection: during codegen time, what code sequence we will be using + // to encode this operation. + + enum PutArgStkKind : __int8 + { + PutArgStkKindInvalid, + PutArgStkKindRepInstr, + PutArgStkKindUnroll, + }; + + PutArgStkKind gtPutArgStkKind; + + unsigned gtNumSlots; // Number of slots for the argument to be passed on stack + bool gtIsStruct; // This stack arg is a struct. + unsigned gtNumberReferenceSlots; // Number of reference slots. + BYTE* gtGcPtrs; // gcPointers +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #if DEBUGGABLE_GENTREE GenTreePutArgStk() : GenTreeUnOp() {} #endif @@ -3325,6 +3446,30 @@ inline GenTreePtr GenTree::MoveNext() return gtOp.gtOp2; } +#ifdef DEBUG +inline bool GenTree::IsListOfLclFlds() + +{ + if (!IsList()) + { + return false; + } + + GenTree* gtListPtr = this; + while (gtListPtr->Current() != nullptr) + { + if (gtListPtr->Current()->OperGet() != GT_LCL_FLD) + { + return false; + } + + gtListPtr = gtListPtr->MoveNext(); + } + + return true; +} +#endif // DEBUG + inline GenTreePtr GenTree::Current() { assert(IsList()); diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index d56ca3ddda..0ee654c837 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -1152,13 +1152,22 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, BasicBlock * block /* = NULL */ ) { - assert(src->TypeGet() == TYP_STRUCT); - + assert(src->TypeGet() == TYP_STRUCT || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF)); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // TODO-ARM-BUG: Does ARM need this? + // TODO-ARM64-BUG: Does ARM64 need this? + assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || + src->gtOper == GT_IND || src->gtOper == GT_LDOBJ || + src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || + src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA || + src->gtOper == GT_ADDR || GenTree::OperIsSIMD(src->gtOper)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_LDOBJ || src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA || GenTree::OperIsSIMD(src->gtOper)); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (src->gtOper == GT_CALL) { @@ -1187,8 +1196,14 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum); lcl->gtType = src->gtType; dest = lcl; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd); +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. + assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs."); + + // Make the struct non promotable. The eightbytes could contain multiple fields. + lvaTable[lcl->gtLclVarCommon.gtLclNum].lvDontPromote = true; #endif } else @@ -1207,6 +1222,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, { GenTreePtr call = src->gtRetExpr.gtInlineCandidate; noway_assert(call->gtOper == GT_CALL); + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) { // insert the return value buffer into the argument list as first byref parameter @@ -1274,7 +1290,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, } else if (src->gtOper == GT_COMMA) { - assert(src->gtOp.gtOp2->gtType == TYP_STRUCT); // Second thing is the struct + // Second thing is the struct or it's address. + assert(src->gtOp.gtOp2->gtType == TYP_STRUCT || src->gtOp.gtOp2->gtType == TYP_BYREF); if (pAfterStmt) { * pAfterStmt = fgInsertStmtAfter(block, * pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs)); @@ -1287,6 +1304,10 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, // evaluate the second thing using recursion return impAssignStructPtr(dest, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block); } + else if (src->gtOper == GT_ADDR) + { + // In case of address already in src, use it to copy the struct. + } else { src = gtNewOperNode(GT_ADDR, TYP_BYREF, src); @@ -4528,8 +4549,7 @@ GenTreePtr Compiler::impTransformThis (GenTreePtr thisPtr, GenTreePtr obj = thisPtr; assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL); - obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass - ); + obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass); obj->gtFlags |= GTF_EXCEPT; CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass); @@ -5948,7 +5968,14 @@ var_types Compiler::impImportCall (OPCODE opcode, } } - /* Check for varargs */ + // Check for varargs +#if !FEATURE_VARARG + if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || + (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) + { + BADCODE("Varargs not supported."); + } +#endif // !FEATURE_VARARG if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) @@ -6699,12 +6726,23 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * return false; } -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +#if defined(_TARGET_AMD64_) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(!info.compIsVarArgs && "Varargs not supported in CoreCLR on Unix."); + if (IsRegisterPassable(methInfo->args.retTypeClass)) + { + return false; + } + + // The struct is not aligned properly or it is bigger than 16 bytes, + // or it is custom layout, or it is not passed in registers for any other reason. + return true; +#elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + // Check for TYP_STRUCT argument that can fit into a single register. // We don't need a return buffer if: // i) TYP_STRUCT argument that can fit into a single register and // ii) Power of two sized TYP_STRUCT. unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass); - return (size > TARGET_POINTER_SIZE) || ((size & (size-1)) != 0); + return (size > TARGET_POINTER_SIZE) || ((size & (size - 1)) != 0); #elif defined(_TARGET_ARM_) // Check for non HFA: in ARM HFAs are returned in registers. if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass)) @@ -6717,8 +6755,6 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * // TODO-ARM64-NYI: HFA/HVA arguments. // Check for TYP_STRUCT argument that is greater than 16 bytes. return info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > 16; -#elif defined(_TARGET_X86_) - return true; #else // _TARGET_* #error Unsupported or unset target architecture #endif // _TARGET_* @@ -6792,7 +6828,6 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd) { assert(call->gtOper == GT_CALL); - if (call->TypeGet() != TYP_STRUCT) { return call; @@ -6826,13 +6861,46 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, return call; } - return impAssignHfaToVar(call, retClsHnd); + return impAssignStructToVar(call, retClsHnd); } -#endif +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. + assert(!call->gtCall.IsVarargs() && "varargs not allowed for System V OSs."); + + // The return is a struct if not normalized to a single eightbyte return type below. + call->gtCall.gtReturnType = TYP_STRUCT; + // Get the classification for the struct. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + if (structDesc.passedInRegisters) + { + call->gtCall.SetRegisterReturningStructState(structDesc); + + if (structDesc.eightByteCount <= 1) + { + call->gtCall.gtReturnType = getEightByteType(structDesc, 0); + } + else + { + if (!call->gtCall.CanTailCall() && ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0)) + { + // If we can tail call returning in registers struct or inline a method that returns + // a registers returned struct, then don't assign it to + // a variable back and forth. + return impAssignStructToVar(call, retClsHnd); + } + } + } + else + { + call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; + } + + return call; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING unsigned size = info.compCompHnd->getClassSize(retClsHnd); BYTE gcPtr = 0; - // Check for TYP_STRUCT argument that can fit into a single register // change the type on those trees. // TODO-ARM64-NYI: what about structs 9 to 16 bytes that fit in two consecutive registers? @@ -6913,7 +6981,37 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL assert(info.compRetBuffArg == BAD_VAR_NUM); #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(info.compRetNativeType != TYP_STRUCT); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(!info.compIsVarArgs); // No VarArgs for CoreCLR. + if (info.compRetNativeType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + + if (structDesc.passedInRegisters) + { + if (op->gtOper == GT_LCL_VAR) + { + // This LCL_VAR is a register return value, it stays as a TYP_STRUCT + unsigned lclNum = op->gtLclVarCommon.gtLclNum; + // Make sure this struct type stays as struct so that we can return it in registers. + lvaTable[lclNum].lvDontPromote = true; + + return op; + } + + if (op->gtOper == GT_CALL) + { + return op; + } + + return impAssignStructToVar(op, retClsHnd); + } + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + #elif defined(_TARGET_ARM_) if (!info.compIsVarArgs && IsHfa(retClsHnd)) { @@ -6941,7 +7039,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL return op; } } - return impAssignHfaToVar(op, retClsHnd); + return impAssignStructToVar(op, retClsHnd); } #endif @@ -7003,7 +7101,22 @@ REDO_RETURN_NODE: } else { - assert(info.compRetNativeType == op->gtCall.gtReturnType); +#ifdef DEBUG +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (op->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + assert(getEightByteType(structDesc, 0) == op->gtCall.gtReturnType); + } + else +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + { + assert(info.compRetNativeType == op->gtCall.gtReturnType); + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // DEBUG // Don't change the gtType node just yet, it will get changed later return op; } @@ -7012,8 +7125,19 @@ REDO_RETURN_NODE: { op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd); } - - op->gtType = info.compRetNativeType; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (op->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + op->gtType = getEightByteType(structDesc, 0); + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + op->gtType = info.compRetNativeType; + } return op; } @@ -11412,7 +11536,6 @@ DO_LDFTN: } eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo); - // Figure out the type of the member. We always call canAccessField, so you always need this // handle CorInfoType ciType = fieldInfo.fieldType; @@ -11590,7 +11713,6 @@ DO_LDFTN: /* Create the data member node */ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset); - op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation if (isLoadAddress) @@ -11850,7 +11972,6 @@ FIELD_DONE: /* Create the data member node */ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset); - op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation break; @@ -12396,7 +12517,11 @@ FIELD_DONE: | | | push the BYREF to this local | |--------------------------------------------------------------------- | UNBOX_ANY | push a GT_LDOBJ of | push the STRUCT | - | | the BYREF | | + | | the BYREF | For Linux when the | + | | | struct is returned in two | + | | | registers create a temp | + | | | which address is passed to | + | | | the unbox_nullable helper. | |--------------------------------------------------------------------- */ @@ -12434,11 +12559,40 @@ FIELD_DONE: impPushOnStack(op1, tiRetVal); oper = GT_LDOBJ; goto LDOBJ; - } - + } + + assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!"); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (op1->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(resolvedToken.hClass, &structDesc); + if (structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS) + { + // Unbox nullable helper returns a TYP_STRUCT. + // We need to spill it to a temp so than we can take the address of it. + // We need the temp so we can pass its address to the unbox_nullable jit helper function. + // This is needed for 2 register returned nullables. + // The one register ones are normalized. For the bigger than 16 bytes ones there is retbuf already passed in rdi. + + unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable")); + lvaTable[tmp].lvDontPromote = true; + lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */); + + op2 = gtNewLclvNode(tmp, TYP_STRUCT); + op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL); + assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp. + + op2 = gtNewLclvNode(tmp, TYP_STRUCT); + op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2); + op1 = gtNewOperNode(GT_COMMA, TYP_STRUCT, op1, op2); + } + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(op1->gtType == TYP_STRUCT); tiRetVal = verMakeTypeInfo(resolvedToken.hClass); - assert(tiRetVal.IsValueClass()); + assert(tiRetVal.IsValueClass()); } impPushOnStack(op1, tiRetVal); @@ -12946,8 +13100,7 @@ LDOBJ: // LDOBJ returns a struct // and an inline argument which is the class token of the loaded obj - op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass - ); + op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass); op1->gtFlags |= GTF_EXCEPT; CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass); @@ -13231,7 +13384,7 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset) } } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) /************************************************************************************** * * When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the @@ -13269,12 +13422,32 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN } } } +#endif -GenTreePtr Compiler::impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) { - unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM.")); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for register returned structs in System V")); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM")); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE); - return gtNewLclvNode(tmpNum, TYP_STRUCT); + GenTreePtr ret = gtNewLclvNode(tmpNum, TYP_STRUCT); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#ifdef DEBUG + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc); + // If single eightbyte, the return type would have been normalized and there won't be a temp var. + // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) + assert(structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); +#endif // DEBUG + // Mark the var to store the eightbytes on stack non promotable. + // The return value is based on eightbytes, so all the fields need + // to be on stack before loading the eightbyte in the corresponding return register. + lvaTable[tmpNum].lvDontPromote = true; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + return ret; } #endif @@ -13297,7 +13470,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & Verify(!verIsByRefLike(tiDeclared) || verIsSafeToReturnByRef(tiVal) , "byref return"); - + Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch"); expectedStack=1; } @@ -13502,15 +13675,35 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & se.seTypeInfo.GetClassHandle(), (unsigned) CHECK_SPILL_ALL); } -#ifdef _TARGET_ARM_ + // TODO-ARM64-NYI: HFA + // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the + // next ifdefs could be refactored in a single method with the ifdef inside. +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(_TARGET_ARM_) if (IsHfa(retClsHnd)) { // Same as !IsHfa but just don't bother with impAssignStructPtr. +#else // !defined(_TARGET_ARM_) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + if (structDesc.passedInRegisters) + { + // If single eightbyte, the return type would have been normalized and there won't be a temp var. + // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr. +#endif // !defined(_TARGET_ARM_) + if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM) { if (!impInlineInfo->retExpr) { +#if defined(_TARGET_ARM_) impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT); +#else // !defined(_TARGET_ARM_) + // The inlinee compiler has figured out the type of the temp already. Use it here. + impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType); +#endif // !defined(_TARGET_ARM_) } } else @@ -13519,7 +13712,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } } else -#endif +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { assert(iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG); GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1); @@ -13575,8 +13768,9 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } else if (info.compRetType == TYP_STRUCT) { -#ifndef _TARGET_ARM_ +#if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // In ARM HFA native types are maintained as structs. + // The multi register System V AMD64 return structs are also left as structs and not normalized. // TODO-ARM64-NYI: HFA noway_assert(info.compRetNativeType != TYP_STRUCT); #endif diff --git a/src/jit/jit.h b/src/jit/jit.h index 9702da3ec9..2901ffd6eb 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -220,6 +220,22 @@ #define INDEBUG_LDISASM_COMMA(x) #endif +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#if defined(UNIX_AMD64_ABI) +#define UNIX_AMD64_ABI_ONLY_ARG(x) , x +#define UNIX_AMD64_ABI_ONLY(x) x +#else // !defined(UNIX_AMD64_ABI) +#define UNIX_AMD64_ABI_ONLY_ARG(x) +#define UNIX_AMD64_ABI_ONLY(x) +#endif // defined(UNIX_AMD64_ABI) + // To get rid of warning 4701 : local variable may be used without being initialized #define DUMMY_INIT(x) (x) @@ -605,7 +621,11 @@ unsigned int unsigned_abs(int x) inline size_t unsigned_abs(ssize_t x) { +#ifndef FEATURE_PAL return ((size_t) abs(x)); +#else // !FEATURE_PAL + return ((size_t) labs(x)); +#endif // !FEATURE_PAL } #endif // _TARGET_64BIT_ diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h index 5c8d10f1b7..4063bafe15 100644 --- a/src/jit/jitgcinfo.h +++ b/src/jit/jitgcinfo.h @@ -253,7 +253,6 @@ public : #endif unsigned short cdArgCnt; - unsigned short cdArgBaseOffset; union { diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index c12f735f68..b9e89f156d 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -103,8 +103,8 @@ void Compiler::lvaInitTypeRef() /* Set compArgsCount and compLocalsCount */ info.compArgsCount = info.compMethodInfo->args.numArgs; - - /* Is there a 'this' pointer */ + + // Is there a 'this' pointer if (!info.compIsStatic) { @@ -133,6 +133,18 @@ void Compiler::lvaInitTypeRef() else #endif { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc); + if (structDesc.eightByteCount > 1) + { + info.compRetNativeType = TYP_STRUCT; + } + else + { + info.compRetNativeType = getEightByteType(structDesc, 0); + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING unsigned size = info.compCompHnd->getClassSize(info.compMethodInfo->args.retTypeClass); // Check for TYP_STRUCT argument that can fit into a single register @@ -173,6 +185,7 @@ void Compiler::lvaInitTypeRef() assert(!"Unexpected size when returning struct by value"); break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } } @@ -191,7 +204,9 @@ void Compiler::lvaInitTypeRef() calling convention is varargs */ if (info.compIsVarArgs) + { info.compArgsCount++; + } // Is there an extra parameter used to pass instantiation info to // shared generic methods and shared generic struct instance methods? @@ -356,18 +371,17 @@ void Compiler::lvaInitArgs(InitVarDscInfo * varDscInfo) //---------------------------------------------------------------------- - /* We have set info.compArgsCount in compCompile() */ - + // We have set info.compArgsCount in compCompile() noway_assert(varDscInfo->varNum == info.compArgsCount); assert (varDscInfo->intRegArgNum <= MAX_REG_ARG); - + codeGen->intRegState.rsCalleeRegArgNum = varDscInfo->intRegArgNum; #if !FEATURE_STACK_FP_X87 codeGen->floatRegState.rsCalleeRegArgNum = varDscInfo->floatRegArgNum; #endif // FEATURE_STACK_FP_X87 - /* The total argument size must be aligned. */ + // The total argument size must be aligned. noway_assert((compArgSize % sizeof(void*)) == 0); #ifdef _TARGET_X86_ @@ -440,6 +454,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) } #endif compArgSize += TARGET_POINTER_SIZE; + varDscInfo->varNum++; varDscInfo->varDsc++; } @@ -449,7 +464,17 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) void Compiler::lvaInitRetBuffArg(InitVarDscInfo * varDscInfo) { LclVarDsc * varDsc = varDscInfo->varDsc; - const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo); + bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (info.compRetNativeType == TYP_STRUCT) + { + if (IsRegisterPassable(info.compMethodInfo->args.retTypeClass)) + { + hasRetBuffArg = false; + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING if (hasRetBuffArg) { @@ -594,7 +619,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) // the type as a float or double. argType = hfaType; } - if (isRegParamType(argType)) { compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES; @@ -644,19 +668,94 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) } #else // !_TARGET_ARM_ +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (argType == TYP_STRUCT) + { + assert(typeHnd != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + if (structDesc.passedInRegisters) + { + unsigned intRegCount = 0; + unsigned floatRegCount = 0; - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + switch (structDesc.eightByteClassifications[i]) + { + case SystemVClassificationTypeInteger: + case SystemVClassificationTypeIntegerReference: + intRegCount++; + break; + case SystemVClassificationTypeSSE: + floatRegCount++; + break; + default: + assert(false && "Invalid eightbyte classification type."); + break; + } + } + + if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount)) + { + structDesc.passedInRegisters = false; // No register to enregister the eightbytes. + } + + if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount)) + { + structDesc.passedInRegisters = false; // No register to enregister the eightbytes. + } + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + // The final home for this incoming register might be our local stack frame + // For System V platforms the final home will always be on the local stack frame. + varDsc->lvOnFrame = true; #endif // !_TARGET_ARM_ - if (varDscInfo->canEnreg(argType, cSlotsToEnregister)) + bool canPassArgInRegisters = false; + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) + { + canPassArgInRegisters = structDesc.passedInRegisters; + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); + } + + if (canPassArgInRegisters) { /* Another register argument */ // Allocate the registers we need. allocRegArg() returns the first argument register number of the set. // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting // to the stack happens. - unsigned firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); + unsigned firstAllocatedRegArgNum = 0; + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned secondAllocatedRegArgNum = 0; + var_types firstEightByteType = TYP_UNDEF; + var_types secondEightByteType = TYP_UNDEF; + varDsc->lvOtherArgReg = REG_NA; + + if (argType == TYP_STRUCT) + { + if (structDesc.eightByteCount >= 1) + { + firstEightByteType = getEightByteType(structDesc, 0); + firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); + } #ifdef _TARGET_ARM_ if (isHfaArg) @@ -668,7 +767,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #endif // _TARGET_ARM_ varDsc->lvIsRegArg = 1; - varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType); + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) + { + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType); + + // If there is a second eightbyte, get a register for it too and map the arg to the reg number. + if (structDesc.eightByteCount >= 2) + { + secondEightByteType = getEightByteType(structDesc, 1); + secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1); + } + + if (secondEightByteType != TYP_UNDEF) + { + varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType); + varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) + { + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType); + } + varDsc->setPrefReg(varDsc->lvArgReg, this); #ifdef _TARGET_ARM_ @@ -682,52 +805,91 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #ifdef DEBUG if (verbose) { - printf("Arg #%u passed in register ", varDscInfo->varNum); - - bool isFloat = varTypeIsFloating(argType); - unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType); + printf("Arg #%u passed in register(s) ", varDscInfo->varNum); + bool isFloat = false; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // In case of one eightbyte struct the type is already normalized earlier. + // The varTypeIsFloating(argType) is good for this case. + if ((argType == TYP_STRUCT) && (structDesc.eightByteCount >= 1)) + { + isFloat = varTypeIsFloating(firstEightByteType); + } + else +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + isFloat = varTypeIsFloating(argType); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) { - if (ix > 0) - printf(","); + // Print both registers, just to be clear + if (firstEightByteType == TYP_UNDEF) + { + printf("firstEightByte: <not used>"); + } + else + { + printf("firstEightByte: %s", getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType), isFloat)); + } - if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack + if (secondEightByteType == TYP_UNDEF) { - printf(" stack slots:%d", cSlots - ix); - break; + printf(", secondEightByte: <not used>"); } + else + { + printf(", secondEightByte: %s", getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType), varTypeIsFloating(secondEightByteType))); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType); -#ifdef _TARGET_ARM_ - if (isFloat) + for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++) { - // Print register size prefix - if (argType == TYP_DOUBLE) + if (ix > 0) + printf(","); + + if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack + { + printf(" stack slots:%d", cSlots - ix); + break; + } + +#ifdef _TARGET_ARM_ + if (isFloat) { - // Print both registers, just to be clear - printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat), - getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat)); - - // doubles take 2 slots - assert(ix + 1 < cSlots); - ++ix; - ++regArgNum; + // Print register size prefix + if (argType == TYP_DOUBLE) + { + // Print both registers, just to be clear + printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat), + getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat)); + + // doubles take 2 slots + assert(ix + 1 < cSlots); + ++ix; + ++regArgNum; + } + else + { + printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); + } } else +#endif // _TARGET_ARM_ { printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); } } - else -#endif // _TARGET_ARM_ - { - printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); - } } printf("\n"); } #endif // DEBUG - } // if canEnreg() + } // end if (canPassArgInRegisters) else { #ifdef _TARGET_ARM_ @@ -739,8 +901,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #endif } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of + // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up. + compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING compArgSize += argSize; - +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING if (info.compIsVarArgs) { #if defined(_TARGET_X86_) @@ -807,6 +974,7 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo * varDscInfo) varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet()); varDsc->setPrefReg(varDsc->lvArgReg, this); varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + varDscInfo->intRegArgNum++; #ifdef DEBUG @@ -1180,11 +1348,6 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, lvaStructPromotionInfo * StructPromotionInfo, bool sortFields) { -#ifdef UNIX_AMD64_ABI - // TODO-Amd64-Unix: For now don't promote structs on Linux. - // This should be brought online with the full SystemVStruct passing work. - return; -#endif // UNIX_AMD64_ABI assert(eeIsValueClass(typeHnd)); if (typeHnd != StructPromotionInfo->typeHnd) @@ -2844,14 +3007,21 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree) } #endif // ASSERTION_PROP + bool allowStructs = false; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // On System V the type of the var could be a TYP_STRUCT. + allowStructs = varDsc->lvType == TYP_STRUCT; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + /* Variables must be used as the same type throughout the method */ - noway_assert(tiVerificationNeeded || - varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || - genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || - (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || - (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || - (tree->gtFlags & GTF_VAR_CAST) || - varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); + noway_assert(tiVerificationNeeded || + varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || + allowStructs || + genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || + (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || + (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || + (tree->gtFlags & GTF_VAR_CAST) || + varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); /* Remember the type of the reference */ @@ -3690,7 +3860,6 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } #endif //_TARGET_AMD64_ - unsigned lclNum; LclVarDsc * varDsc; for (lclNum = 0, varDsc = lvaTable; @@ -3735,6 +3904,7 @@ void Compiler::lvaFixVirtualFrameOffsets() if (doAssignStkOffs) { varDsc->lvStkOffs += delta; + #if DOUBLE_ALIGN if (genDoubleAlign() && !codeGen->isFramePointerUsed()) { @@ -3886,11 +4056,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() { noway_assert(lclNum == info.compThisArg); #ifndef _TARGET_X86_ -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); #endif // _TARGET_X86_ lclNum++; } @@ -3902,11 +4068,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() noway_assert(lclNum == info.compRetBuffArg); noway_assert(lvaTable[lclNum].lvIsRegArg); #ifndef _TARGET_X86_ -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); #endif // _TARGET_X86_ lclNum++; } @@ -3917,20 +4079,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) { noway_assert(lclNum == (unsigned)info.compTypeCtxtArg); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } if (info.compIsVarArgs) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } #endif // USER_ARGS_COME_LAST @@ -3976,18 +4130,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (lvaIsPreSpilled(preSpillLclNum, preSpillMask)) { unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg( - preSpillLclNum, - argSize, - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI argOffs = lvaAssignVirtualFrameOffsetToArg( preSpillLclNum, argSize, argOffs); -#endif // !UNIX_AMD64_ABI argLcls++; // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100 @@ -4008,18 +4154,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() { if (!lvaIsPreSpilled(stkLclNum, preSpillMask)) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg( - stkLclNum, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI argOffs = lvaAssignVirtualFrameOffsetToArg( stkLclNum, eeGetArgSize(argLst, &info.compMethodInfo->args), argOffs); -#endif // !UNIX_AMD64_ABI argLcls++; } argLst = info.compCompHnd->getArgNext(argLst); @@ -4029,16 +4167,18 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() #else // !_TARGET_ARM_ for (unsigned i = 0; i < argSigLen; i++) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI + unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // On the stack frame the homed arg always takes a full number of slots + // for proper stack alignment. Make sure the real struct size is properly rounded up. + argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs); -#endif // UNIX_AMD64_ABI + argumentSize, + argOffs + UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); argLst = info.compCompHnd->getArgNext(argLst); } #endif // !_TARGET_ARM_ @@ -4049,26 +4189,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) { noway_assert(lclNum == (unsigned)info.compTypeCtxtArg); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } if (info.compIsVarArgs) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } #endif // USER_ARGS_COME_LAST } +#ifdef UNIX_AMD64_ABI // // lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an // individual argument, and return the offset for the next argument. @@ -4076,12 +4209,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() // (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) // The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance, // ret address slot, stack frame padding, alloca instructions, etc. +// Note: This is the implementation for UNIX_AMD64 System V platforms. // -#ifdef UNIX_AMD64_ABI -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int * callerArgOffset) -#else // !UNIX_AMD64_ABI -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs) -#endif // !UNIX_AMD64_ABI +int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset)) { noway_assert(lclNum < info.compArgsCount); noway_assert(argSize); @@ -4114,30 +4244,131 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize if (varDsc->lvIsRegArg) { - /* Argument is passed in a register, don't count it - * when updating the current offset on the stack */ - -#ifndef _TARGET_ARM_ - noway_assert(argSize == sizeof(void *)); -#endif + // Argument is passed in a register, don't count it + // when updating the current offset on the stack. -#if defined(_TARGET_X86_) - argOffs += sizeof(void *); -#elif defined(_TARGET_AMD64_) -#ifdef UNIX_AMD64_ABI if (varDsc->lvOnFrame) -#endif { // The offset for args needs to be set only for the stack homed arguments for System V. varDsc->lvStkOffs = argOffs; - argOffs += sizeof(void *); } -#ifdef UNIX_AMD64_ABI - else + else { varDsc->lvStkOffs = 0; } + } + else + { + // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed. + // So, they can be accessed with positive offset. + // On System V platforms, if the RA decides to home a register passed arg on the stack, + // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments + // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.) + // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space. + // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters. + // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence, + // existence of alloca instructions, ret address pushed, ets. + + varDsc->lvStkOffs = *callerArgOffset; + // Structs passed on stack could be of size less than TARGET_POINTER_SIZE. + // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment. + if (varDsc->lvType == TYP_STRUCT) + { + *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE); + } + else + { + *callerArgOffset += TARGET_POINTER_SIZE; + } + } + + // For struct promoted parameters we need to set the offsets for both LclVars. + // + // For a dependent promoted struct we also assign the struct fields stack offset + if (varDsc->lvPromotedStruct()) + { + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); + + if (promotionType == PROMOTION_TYPE_DEPENDENT) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + assert(fieldVarNum == varDsc->lvFieldLclStart); + lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + } + } + // For an independent promoted struct field we also assign the parent struct stack offset + else if (varDsc->lvIsStructField) + { + noway_assert(varDsc->lvParentLcl < lvaCount); + lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; + } + + if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) + argOffs += argSize; + + return argOffs; +} + +#else // !UNIX_AMD64_ABI + +// +// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an +// individual argument, and return the offset for the next argument. +// Note: This method only calculates the initial offset of the stack passed/spilled arguments +// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) +// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance, +// ret address slot, stack frame padding, alloca instructions, etc. +// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.) +int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset)) +{ + noway_assert(lclNum < info.compArgsCount); + noway_assert(argSize); + + if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + argOffs -= argSize; + + unsigned fieldVarNum = BAD_VAR_NUM; + + noway_assert(lclNum < lvaCount); + LclVarDsc * varDsc = lvaTable + lclNum; + + if (varDsc->lvPromotedStruct()) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + fieldVarNum = varDsc->lvFieldLclStart; + + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); + + if (promotionType == PROMOTION_TYPE_INDEPENDENT) + { + lclNum = fieldVarNum; + noway_assert(lclNum < lvaCount); + varDsc = lvaTable + lclNum; + assert(varDsc->lvIsStructField); + } + } + + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg) + { + /* Argument is passed in a register, don't count it + * when updating the current offset on the stack */ + +#ifndef _TARGET_ARM_ +#if DEBUG + noway_assert(argSize == sizeof(void *)); +#endif // DEBUG #endif + +#if defined(_TARGET_X86_) + argOffs += sizeof(void *); +#elif defined(_TARGET_AMD64_) + // The offset for args needs to be set only for the stack homed arguments for System V. + varDsc->lvStkOffs = argOffs; + // Register arguments also take stack space. + argOffs += sizeof(void *); #elif defined(_TARGET_ARM64_) // Register arguments don't take stack space. #elif defined(_TARGET_ARM_) @@ -4181,32 +4412,32 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize case TYP_DOUBLE: case TYP_LONG: + { + // + // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8. + // + // ------- CALLER SP ------- + // r3 + // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping. + // r1 VACookie -- argOffs = 0 + // ------------------------- + // + // Consider argOffs as if it accounts for number of prespilled registers before the current register. + // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs + // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then + // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped. + // + // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register. + int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE; + if (argOffs < prevRegsSize) { - // - // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8. - // - // ------- CALLER SP ------- - // r3 - // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping. - // r1 VACookie -- argOffs = 0 - // ------------------------- - // - // Consider argOffs as if it accounts for number of prespilled registers before the current register. - // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs - // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then - // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped. - // - // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register. - int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE; - if (argOffs < prevRegsSize) - { - // We must align up the argOffset to a multiple of 8 to account for skipped registers. - argOffs = roundUp(argOffs, 2*TARGET_POINTER_SIZE); - } - // We should've skipped only a single register. - assert(argOffs == prevRegsSize); + // We must align up the argOffset to a multiple of 8 to account for skipped registers. + argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE); } - break; + // We should've skipped only a single register. + assert(argOffs == prevRegsSize); + } + break; default: // No alignment of argOffs required @@ -4292,16 +4523,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize if (!compIsProfilerHookNeeded()) #endif { - bool cond = (info.compIsVarArgs && - // Does cur stk arg require double alignment? - ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) || - (varDsc->lvType == TYP_DOUBLE) || - (varDsc->lvType == TYP_LONG)) - ) || - // Did first reg arg require alignment? - (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST)); - - noway_assert(cond); + bool cond = (info.compIsVarArgs && + // Does cur stk arg require double alignment? + ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) || + (varDsc->lvType == TYP_DOUBLE) || + (varDsc->lvType == TYP_LONG)) + ) || + // Did first reg arg require alignment? + (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST)); + + noway_assert(cond); noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of alignment } argOffs = sizeofPreSpillRegArgs; @@ -4321,7 +4552,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize case TYP_DOUBLE: case TYP_LONG: // We must align up the argOffset to a multiple of 8 - argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2*TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs; + argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs; break; default: @@ -4330,21 +4561,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize } #endif // _TARGET_ARM_ -#ifdef UNIX_AMD64_ABI - // For Windows there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed. - // So, they can be accessed with positive offset. - // On System V platforms, if the RA decides to home a register passed arg on the stack, - // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments - // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.) - // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space. - // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters. - // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence, - // existence of alloca instructions, ret address pushed, ets. - varDsc->lvStkOffs = *callerArgOffset; - *callerArgOffset += TARGET_POINTER_SIZE; -#else // !UNIX_AMD64_ABI varDsc->lvStkOffs = argOffs; -#endif // !UNIX_AMD64_ABI } // For struct promoted parameters we need to set the offsets for both LclVars. @@ -4360,31 +4577,31 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize } else #endif // !defined(_TARGET_64BIT_) - if (varDsc->lvPromotedStruct()) - { - lvaPromotionType promotionType = lvaGetPromotionType(varDsc); - - if (promotionType == PROMOTION_TYPE_DEPENDENT) + if (varDsc->lvPromotedStruct()) { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); - assert(fieldVarNum == varDsc->lvFieldLclStart); - lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + if (promotionType == PROMOTION_TYPE_DEPENDENT) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + assert(fieldVarNum == varDsc->lvFieldLclStart); + lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + } } - } // For an independent promoted struct field we also assign the parent struct stack offset - else if (varDsc->lvIsStructField) - { - noway_assert(varDsc->lvParentLcl < lvaCount); - lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; - } + else if (varDsc->lvIsStructField) + { + noway_assert(varDsc->lvParentLcl < lvaCount); + lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; + } if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) argOffs += argSize; return argOffs; } - +#endif // !UNIX_AMD64_ABI /***************************************************************************** * lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to @@ -5261,8 +5478,18 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs() { // For promoted struct fields that are params, we will // assign their offsets in lvaAssignVirtualFrameOffsetToArg(). + // This is not true for the System V systems since there is no + // outgoing args space. Assign the dependently promoted fields properly. // - if (varDsc->lvIsStructField && !varDsc->lvIsParam) + if (varDsc->lvIsStructField +#ifndef UNIX_AMD64_ABI + // For System V platforms there is no outgoing args space. + // A register passed struct arg is homed on the stack in a separate local var. + // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos. + // Make sure the code below is not executed for these structs and the offset is not changed. + && !varDsc->lvIsParam +#endif // UNIX_AMD64_ABI + ) { LclVarDsc * parentvarDsc = &lvaTable[varDsc->lvParentLcl]; lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index bb69d103cf..5882ecfa71 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1001,9 +1001,39 @@ void Lowering::SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr oldChild->InsertAfterSelf(newNode); } +//------------------------------------------------------------------------ +// NewPutArg: rewrites the tree to put an arg in a register or on the stack. +// +// Arguments: +// call - the call whose arg is being rewritten. +// arg - the arg being rewritten. +// fp - the ArgTabEntry for the argument. +// type - the type of the argument. +// +// Return Value: +// The new tree that was created to put the arg in the right place +// or the incoming arg if the arg tree was not rewritten. +// +// Assumptions: +// call, arg, and fp must be non-null. +// +// Notes: +// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) +// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs +// for two eightbyte structs. +// +// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing +// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers +// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. +// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) +// GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr fp, var_types type) { - GenTreePtr putArg; + assert(call != nullptr); + assert(arg != nullptr); + assert(fp != nullptr); + + GenTreePtr putArg = nullptr; bool updateArgTable = true; #if !defined(_TARGET_64BIT_) @@ -1015,7 +1045,22 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP type = TYP_INT; } #endif // !defined(_TARGET_64BIT_) - if (fp->regNum != REG_STK) + + bool isOnStack = true; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (type == TYP_STRUCT) + { + isOnStack = !fp->structDesc.passedInRegisters; + } + else + { + isOnStack = fp->regNum == REG_STK; + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + isOnStack = fp->regNum == REG_STK; +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (!isOnStack) { #ifdef FEATURE_SIMD // We can have SIMD types that are handled as TYP_DOUBLE, but which need to be @@ -1025,24 +1070,182 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP type = TYP_LONG; } #endif //FEATURE_SIMD +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (fp->isStruct) + { + // The following code makes sure a register passed struct arg is moved to + // the register before the call is made. + // There are two cases (comments added in the code below.) + // 1. The struct is of size one eightbyte: + // In this case a new tree is created that is GT_PUTARG_REG + // with a op1 the original argument. + // 2. The struct is contained in 2 eightbytes: + // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct. + // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST + // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1. + + assert(fp->structDesc.eightByteCount != 0); + + if (fp->structDesc.eightByteCount == 1) + { + // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree. + // + // Here the IR for this operation: + // lowering call : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use) + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + // args : + // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int + // + // late : + // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use) + // new node is : (3, 4)[000071] ------------ * putarg_reg int RV + // + // after : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use) + // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + } + else if (fp->structDesc.eightByteCount == 2) + { + // Case 2 above: Convert the LCL_FLDs to PUTARG_REG + // + // lowering call : + // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1)[000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4)[000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0] + // N010(3, 4)[000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use) + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + // args : + // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void + // + // late : + // lowering arg : N012(11, 13)[000065] ------------ * <list> struct + // + // after : + // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1)[000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0] + // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long + // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use) + // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + + assert(arg->OperGet() == GT_LIST); + GenTreeArgList* argListPtr = arg->AsArgList(); + + for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++) + { + // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD. + GenTreePtr newOper = comp->gtNewOperNode( + GT_PUTARG_REG, + comp->GetTypeFromClassificationAndSizes(fp->structDesc.eightByteClassifications[ctr], fp->structDesc.eightByteSizes[ctr]), + argListPtr->gtOp.gtOp1); + + // CopyCosts + newOper->CopyCosts(argListPtr->gtOp.gtOp1); + + // Splice in the new GT_PUTARG_REG node in the GT_LIST + SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper); + } - putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + // Just return arg. The GT_LIST is not replaced. + // Nothing more to do. + return arg; + } + else + { + assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR. + + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + } } else { // Mark this one as tail call arg if it is a fast tail call. // This provides the info to put this argument in in-coming arg area slot // instead of in out-going arg area slot. + + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(fp->isStruct == (type == TYP_STRUCT))); // Make sure state is correct + #if FEATURE_FASTTAILCALL - putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum, call->IsFastTailCall() DEBUG_ARG(call)); + putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, + type, + arg, + fp->slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct), + call->IsFastTailCall() + DEBUG_ARG(call)); #else - putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum DEBUG_ARG(call)); + putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, + type, + arg, + fp->slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct) + DEBUG_ARG(call)); #endif + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // If the ArgTabEntry indicates that this arg is a struct + // get and store the number of slots that are references. + // This is later used in the codegen for PUT_ARG_STK implementation + // for struct to decide whether and how many single eight-byte copies + // to be done (only for reference slots), so gcinfo is emitted. + // For non-reference slots faster/smaller size instructions are used - + // pair copying using XMM registers or rep mov instructions. + if (fp->isStruct) + { + assert(arg->OperGet() == GT_LDOBJ); + + BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[fp->numSlots]; + + unsigned numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtLdObj.gtClass, gcLayout); + + putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } + putArg->CopyCosts(arg); if (arg->InReg()) + { putArg->SetInReg(); + } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (fp->isStruct) + { + if (fp->structDesc.passedInRegisters) + { + putArg->SetInReg(); + } + } +#endif JITDUMP("new node is : "); DISPNODE(putArg); @@ -1076,10 +1279,14 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) // assignments/stores at this level are not really placing an arg // they are setting up temporary locals that will later be placed into // outgoing regs or stack - if (!arg->OperIsAssignment() && + if ( + !arg->OperIsAssignment() && !arg->OperIsStore() && !arg->IsArgPlaceHolderNode() && - !arg->IsNothingNode() && + !arg->IsNothingNode() && +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + !arg->OperIsPutArgStk() && +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING !arg->OperIsCopyBlkOp()) // these are de facto placeholders (apparently) { fgArgTabEntryPtr fp = comp->gtArgEntryByNode(call, arg); @@ -1153,7 +1360,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) #endif // !defined(_TARGET_64BIT_) { putArg = NewPutArg(call, arg, fp, type); - SpliceInUnary(call, ppArg, putArg); + + // In the case of register passable struct (in one or two registers) + // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.) + // If an extra node is returned, splice it in the right place in the tree. + if (arg != putArg) + { + // putArg and arg are equals if arg is GT_LIST (a list of multiple LCL_FLDs to be passed in registers.) + SpliceInUnary(call, ppArg, putArg); + } } } } diff --git a/src/jit/lower.h b/src/jit/lower.h index ae1f73e5b8..6754b7b75d 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -134,6 +134,10 @@ private: void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra); #endif // FEATURE_SIMD +#if defined(_TARGET_XARCH_) + void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind); +#endif // defined(_TARGET_XARCH_) + void SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr newNode); void DumpNodeInfoMap(); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 08c340cbee..a7b4600df9 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -103,7 +103,38 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) } } - +// TreeNodeInfoInitSimple: +// Sets the srcCount and dstCount for all the trees without special handling based on the tree node type. +// +// args: +// tree: The tree on which TreeNodeInfo's srcCount and dstCount are set. +// info: The TreeNodeInfo on which to set the srcCount and dstCount. +// This is the TreeNodeInfo corresponding to the tree parameter. +// kind: The kind flags of the tree node. +// +void Lowering::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind) +{ + info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + if (kind & (GTK_CONST | GTK_LEAF)) + { + info->srcCount = 0; + } + else if (kind & (GTK_SMPOP)) + { + if (tree->gtGetOp2() != nullptr) + { + info->srcCount = 2; + } + else + { + info->srcCount = 1; + } + } + else + { + unreached(); + } +} /** * Takes care of annotating the register requirements @@ -138,26 +169,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) GenTree* op2; default: - info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - if (kind & (GTK_CONST|GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - if (tree->gtGetOp2() != nullptr) - { - info->srcCount = 2; - } - else - { - info->srcCount = 1; - } - } - else - { - unreached(); - } + TreeNodeInfoInitSimple(tree, info, kind); break; case GT_LCL_FLD: @@ -275,6 +287,24 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) else #endif // !defined(_TARGET_64BIT_) { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (tree->TypeGet() == TYP_STRUCT && + tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) + { +#ifdef DEBUG + GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + assert(varDsc->lvDontPromote); +#endif // DEBUG + // If this is a two eightbyte return, make the var + // contained by the return expression. The code gen will put + // the values in the right registers for return. + info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + info->dstCount = 0; + MakeSrcContained(tree, tree->gtOp.gtOp1); + break; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; info->dstCount = 0; @@ -840,9 +870,10 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } // First, count reg args - +#if FEATURE_VARARG bool callHasFloatRegArgs = false; - +#endif // !FEATURE_VARARG + for (GenTreePtr list = tree->gtCall.gtCallLateArgs; list; list = list->MoveNext()) { assert(list->IsList()); @@ -859,26 +890,52 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) assert(argNode->gtOper == GT_PUTARG_STK); argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.dstCount = 0; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // If the node is a struct and it is put on stack with + // putarg_stk operation, we consume and produce no registers. + // In this case the embedded LdObj node should not produce + // registers too since it is contained. + if (argNode->TypeGet() == TYP_STRUCT) + { + assert(argNode != nullptr && argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_LDOBJ); + argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0; + argNode->gtLsraInfo.srcCount = 0; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING continue; } - var_types argType = argNode->TypeGet(); + regNumber argReg = REG_NA; + regMaskTP argMask = RBM_NONE; + short regCount = 0; + bool isOnStack = true; + if (curArgTabEntry->regNum != REG_STK) + { + isOnStack = false; + var_types argType = argNode->TypeGet(); - callHasFloatRegArgs |= varTypeIsFloating(argType); +#if FEATURE_VARARG + callHasFloatRegArgs |= varTypeIsFloating(argType); +#endif // !FEATURE_VARARG - regNumber argReg = curArgTabEntry->regNum; - short regCount = 1; - // Default case is that we consume one source; modify this later (e.g. for - // promoted structs) - info->srcCount++; + argReg = curArgTabEntry->regNum; + regCount = 1; - regMaskTP argMask = genRegMask(argReg); - argNode = argNode->gtEffectiveVal(); - - if (argNode->TypeGet() == TYP_STRUCT) + // Default case is that we consume one source; modify this later (e.g. for + // promoted structs) + info->srcCount++; + + argMask = genRegMask(argReg); + argNode = argNode->gtEffectiveVal(); + } + + // If the struct arg is wraped in CPYBLK the type of the param will beTYP_VOID. + // Use the curArgTabEntry's isStruct to get whether the param is a struct. + if (argNode->TypeGet() == TYP_STRUCT + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct)) { unsigned originalSize = 0; - bool isPromoted = false; LclVarDsc* varDsc = nullptr; if (argNode->gtOper == GT_LCL_VAR) { @@ -893,20 +950,70 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { noway_assert(!"GT_LDOBJ not supported for amd64"); } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (argNode->gtOper == GT_PUTARG_REG) + { + originalSize = genTypeSize(argNode->gtType); + } + else if (argNode->gtOper == GT_LIST) + { + originalSize = 0; + + // There could be up to 2 PUTARG_REGs in the list + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + for (; argListPtr; argListPtr = argListPtr->Rest()) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + if (iterationNum == 0) + { + varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum; + originalSize = varDsc->lvSize(); + assert(originalSize != 0); + } + else + { + // Need an extra source for every node, but the first in the list. + info->srcCount++; + + // Get the mask for the second putarg_reg + argMask = genRegMask(curArgTabEntry->otherRegNum); + } + + putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask); + putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask); + + // To avoid redundant moves, have the argument child tree computed in the + // register in which the argument is passed to the call. + putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode)); + iterationNum++; + } + + assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING else { noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind"); } - unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; - regNumber reg = (regNumber)(argReg + 1); - unsigned remainingSlots = slots - 1; - while (remainingSlots > 0 && reg <= REG_ARG_LAST) + unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; + unsigned remainingSlots = slots; + + if (!isOnStack) { - argMask |= genRegMask(reg); - reg = (regNumber)(reg + 1); - remainingSlots--; - regCount++; + remainingSlots = slots - 1; + + regNumber reg = (regNumber)(argReg + 1); + while (remainingSlots > 0 && reg <= REG_ARG_LAST) + { + argMask |= genRegMask(reg); + reg = (regNumber)(reg + 1); + remainingSlots--; + regCount++; + } } short internalIntCount = 0; @@ -915,9 +1022,21 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // This TYP_STRUCT argument is also passed in the outgoing argument area // We need a register to address the TYP_STRUCT // And we may need 2 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + internalIntCount = 1; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING internalIntCount = 2; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } argNode->gtLsraInfo.internalIntCount = internalIntCount; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (argNode->gtOper == GT_PUTARG_REG) + { + argNode->gtLsraInfo.setDstCandidates(l, argMask); + argNode->gtLsraInfo.setSrcCandidates(l, argMask); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -931,6 +1050,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode)); } + +#if FEATURE_VARARG // In the case of a varargs call, the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the integer and floating point registers. // Since the integer register is not associated with this arg node, we will reserve it as @@ -942,6 +1063,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) tree->gtLsraInfo.setInternalIntCount(tree->gtLsraInfo.internalIntCount + 1); tree->gtLsraInfo.addInternalCandidates(l, genRegMask(targetReg)); } +#endif // FEATURE_VARARG } // Now, count stack args @@ -995,6 +1117,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) args = args->gtOp.gtOp2; } +#if FEATURE_VARARG // If it is a fast tail call, it is already preferenced to use RAX. // Therefore, no need set src candidates on call tgt again. if (tree->gtCall.IsVarargs() && @@ -1007,6 +1130,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // by Amd64 ABI. ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); } +#endif // !FEATURE_VARARG } break; @@ -1020,7 +1144,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->dstCount = 1; } break; - #ifdef _TARGET_X86_ case GT_LDOBJ: NYI_X86("GT_LDOBJ"); @@ -1218,6 +1341,116 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } break; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_PUTARG_STK: + { + if (tree->TypeGet() != TYP_STRUCT) + { + TreeNodeInfoInitSimple(tree, info, kind); + break; + } + + GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk(); + + GenTreePtr dstAddr = tree; + GenTreePtr srcAddr = tree->gtOp.gtOp1; + + assert(srcAddr->OperGet() == GT_LDOBJ); + info->srcCount = srcAddr->gtLsraInfo.dstCount; + + // If this is a stack variable address, + // make the op1 contained, so this way + // there is no unnecessary copying between registers. + // To avoid assertion, increment the parent's source. + // It is recovered below. + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + info->srcCount += 1; + } + + info->dstCount = 0; + + // In case of a CpBlk we could use a helper call. In case of putarg_stk we + // can't do that since the helper call could kill some already set up outgoing args. + // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj. + // The cpyXXXX code is rather complex and this could cause it to be more complex, but + // it might be the right thing to do. + + // This threshold will decide from using the helper or let the JIT decide to inline + // a code sequence of its choice. + ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); + ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE; + + // TODO-X86-CQ: The helper call either is not supported on x86 or required more work + // (I don't know which). + + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. + // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of + // our framework assemblies, so this is the main code generation scheme we'll use. + if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0) + { + // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. + // + // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte. + // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude + // RBM_NON_BYTE_REGS from internal candidates. + if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) + { + info->internalIntCount++; + regMaskTP regMask = l->allRegs(TYP_INT); + +#ifdef _TARGET_X86_ + if ((size % 2) != 0) + { + regMask &= ~RBM_NON_BYTE_REGS; + } +#endif + info->setInternalCandidates(l, regMask); + } + + if (size >= XMM_REGSIZE_BYTES) + { + // If we have a buffer larger than XMM_REGSIZE_BYTES, + // reserve an XMM register to use it for a + // series of 16-byte loads and stores. + info->internalFloatCount = 1; + info->addInternalCandidates(l, l->internalFloatRegCandidates()); + } + + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); + } + + // If src or dst are on stack, we don't have to generate the address into a register + // because it's just some constant+SP + putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll; + } + else + { + info->internalIntCount += 3; + info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI)); + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); + } + + putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr; + } + + // Always mark the LDOBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. + MakeSrcContained(putArgStkTree, srcAddr); + + // Balance up the inc above. + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + info->srcCount -= 1; + } + } + + break; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_COPYBLK: { // Sources are src, dest and size (or class token for CpObj). @@ -2995,6 +3228,6 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode) return true; } -#endif // _TARGET_AMD64_ +#endif // _TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index d8341b1d7f..8f11af9878 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2671,14 +2671,14 @@ LinearScan::buildInternalRegisterDefsForNode(GenTree *tree, int internalIntCount = tree->gtLsraInfo.internalIntCount; regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this); - // If this is a varArgs call, the internal candidates represent the integer registers that - // floating point arguments must be copied into. These must be handled as fixed regs. + // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set, + // then they must be handled as fixed registers. + // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) bool fixedRegs = false; - if ((internalIntCount != 0) && (tree->OperGet() == GT_CALL)) + regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); + if (((int)genCountBits(internalIntCandidates)) == internalIntCount) { - assert(tree->gtCall.IsVarargs()); fixedRegs = true; - assert((int)genCountBits(internalCands) == internalIntCount); } for (count = 0; count < internalIntCount; count++) @@ -3317,6 +3317,50 @@ LinearScan::insertZeroInitRefPositions() } } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +// ----------------------------------------------------------------------- +// Sets the register state for an argument of type STRUCT for System V systems. +// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp +// for how state for argument is updated for unix non-structs and Windows AMD64 structs. +void +LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(argDsc->lvType == TYP_STRUCT); + RegState * intRegState = &compiler->codeGen->intRegState; + RegState * floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + else + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + } + + + if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + else + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + } +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate // regState (either compiler->intRegState or compiler->floatRegState), @@ -3339,31 +3383,41 @@ LinearScan::insertZeroInitRefPositions() void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { - RegState * intRegState = &compiler->codeGen->intRegState; - RegState * floatRegState = &compiler->codeGen->floatRegState; - - // In the case of AMD64 we'll still use the floating point registers - // to model the register usage for argument on vararg calls, so - // we will ignore the varargs condition to determine whether we use - // XMM registers or not for setting up the call. - bool isFloat = (isFloatRegType(argDsc->lvType) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For System V AMD64 calls the argDsc can have 2 registers (for structs.) + // Handle them here. + if (argDsc->lvType == TYP_STRUCT) + { + unixAmd64UpdateRegStateForArg(argDsc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + RegState * intRegState = &compiler->codeGen->intRegState; + RegState * floatRegState = &compiler->codeGen->floatRegState; + // In the case of AMD64 we'll still use the floating point registers + // to model the register usage for argument on vararg calls, so + // we will ignore the varargs condition to determine whether we use + // XMM registers or not for setting up the call. + bool isFloat = (isFloatRegType(argDsc->lvType) #ifndef _TARGET_AMD64_ - && !compiler->info.compIsVarArgs + && !compiler->info.compIsVarArgs #endif - ); + ); #ifdef _TARGET_ARM_ - if (argDsc->lvIsHfaRegArg) isFloat = true; + if (argDsc->lvIsHfaRegArg) isFloat = true; #endif // _TARGET_ARM_ - if (isFloat) - { - JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(floatRegState, argDsc); - } - else - { - JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(intRegState, argDsc); + if (isFloat) + { + JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(floatRegState, argDsc); + } + else + { + JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(intRegState, argDsc); + } } } @@ -3548,7 +3602,9 @@ LinearScan::buildIntervals() // won't have done dataflow on it, but it needs to be marked as live-in so // it will get saved in the prolog. if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) + { continue; + } if (argDsc->lvIsRegArg) updateRegStateForArg(argDsc); diff --git a/src/jit/lsra.h b/src/jit/lsra.h index e57873fb65..cef6669513 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -574,6 +574,14 @@ private: void buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors); #endif //FEATURE_SIMD +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For AMD64 on SystemV machines. This method + // is called as replacement for raUpdateRegStateForArg + // that is used on Windows. On System V systems a struct can be passed + // partially using registers from the 2 register files. + void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); @@ -998,7 +1006,6 @@ private: // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. VARSET_TP largeVectorCalleeSaveCandidateVars; #endif // FEATURE_SIMD - }; /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index f3eb506b0d..b000f58969 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -926,6 +926,7 @@ fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs) argTableSize = numArgs; // the allocated table size argsComplete = false; argsSorted = false; + if (argTableSize == 0) argTable = NULL; else @@ -1127,7 +1128,6 @@ void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry) argCount++; } - fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, @@ -1137,38 +1137,79 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, { fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned) -1; - curArgTabEntry->tmpNum = (unsigned) -1; - curArgTabEntry->isSplit = false; - curArgTabEntry->isTmp = false; - curArgTabEntry->needTmp = false; - curArgTabEntry->needPlace = false; - curArgTabEntry->processed = false; - curArgTabEntry->isHfaRegArg = false; - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->isSplit = false; + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + curArgTabEntry->isHfaRegArg = false; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; AddArg(curArgTabEntry); return curArgTabEntry; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment, + const bool isStruct, + const regNumber otherRegNum, + const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) +{ + fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment); + assert(curArgTabEntry != nullptr); + + // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a + // PlaceHolder node (in case of needed late argument, for example.) + // This requires using of an extra flag. At creation time the state is right, so + // and this assert enforces that. + assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct)); + curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct + curArgTabEntry->isStruct = isStruct; // is this a struct arg + + if (isStruct && structDescPtr != nullptr) + { + curArgTabEntry->structDesc.CopyFrom(*structDescPtr); + } + + return curArgTabEntry; +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, - unsigned alignment) + unsigned alignment + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct)) { fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; - nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment); + nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment); + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a + // PlaceHolder node (in case of needed late argument, for example.) + // This reqires using of an extra flag. At creation time the state is right, so + // and this assert enforces that. + assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct)); + curArgTabEntry->isStruct = isStruct; // is this a struct arg +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) curArgTabEntry->argNum = argNum; curArgTabEntry->node = node; @@ -1399,9 +1440,24 @@ void fgArgInfo::ArgsComplete() for (unsigned curInx = 0; curInx < argCount; curInx++) { - fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL); + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + assert(curArgTabEntry != NULL); GenTreePtr argx = curArgTabEntry->node; + // If this is a struct, mark it for needing a tempVar. + // In the copyblk and store this should have minimal perf impact since + // the local vars where we copy/store to already exist and the logic for temp + // var will not create a new one if it creates a tempVar from another tempVar. + // (Debugging through the code, there was no new copy of data created, neither a new tempVar.) + // The need for this arise from Lower::LowerArg. + // In case of copyblk and store operation, the NewPutArg method will + // not be invoked and the struct will not be loaded to be passed in + // registers or by value on the stack. + if (argx->TypeGet() == TYP_STRUCT FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct)) + { + curArgTabEntry->needTmp = true; + } + if (curArgTabEntry->regNum == REG_STK) { hasStackArgs = true; @@ -1415,8 +1471,11 @@ void fgArgInfo::ArgsComplete() } else // we have a register argument, next we look for a TYP_STRUCT { - if (argx->TypeGet() == TYP_STRUCT) + if (argx->TypeGet() == TYP_STRUCT + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct)) + { hasStructRegArg = true; + } } /* If the argument tree contains an assignment (GTF_ASG) then the argument and @@ -1461,7 +1520,6 @@ void fgArgInfo::ArgsComplete() } } - #if FEATURE_FIXED_OUT_ARGS // Like calls, if this argument has a tree that will do an inline throw, // a call to a jit helper, then we need to treat it like a call (but only @@ -1917,7 +1975,11 @@ void fgArgInfo::SortArgs() argsSorted = true; } -GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) +// This function creates a tmp var ony if needed. +// We need this to be done in order to enforce ordering +// of the evaluation of arguments. There are times this function will not be called for an argument at all. +GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters)) { LclVarDsc * varDsc = &lvaTable[tmpVarNum]; assert(varDsc->lvIsTemp); @@ -1926,9 +1988,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) // Create a copy of the temp to go into the late argument list GenTreePtr arg = gtNewLclvNode(tmpVarNum, type); -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) if (type == TYP_STRUCT) { + + +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING switch (lvaLclExactSize(tmpVarNum)) { case 1: type = TYP_BYTE; break; @@ -1953,6 +2018,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) default: break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + // If we didn't change the type of the struct, it means // its structure doesn't support to be passed directly through a // register, so we need to pass a pointer to the destination where @@ -1960,7 +2027,23 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) if (type == TYP_STRUCT) { arg->gtFlags |= GTF_DONT_CSE; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + // If it is passed in registers, don't get the address of the var. Make it a + // field instead. It will be loaded in registers with putarg_reg tree in lower. + if (passedInRegisters) + { + arg->ChangeOper(GT_LCL_FLD); + arg->gtType = type; + } + else + { + arg = gtNewOperNode(GT_ADDR, TYP_STRUCT, arg); + } +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -1973,10 +2056,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) arg->gtFlags |= GTF_DONT_CSE; arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); - // Ldobj the temp to use it as a call argument - arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum) - ); + arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum)); arg->gtFlags |= GTF_EXCEPT; #endif // _TARGET_AMD64_ @@ -2007,7 +2088,7 @@ void fgArgInfo::EvalArgsToTemps() // Only the register arguments need to be replaced with placeholders node // stacked arguments are evaluated and pushed in order // - if (curArgTabEntry->regNum == REG_STK) + if (curArgTabEntry->regNum == REG_STK && !curArgTabEntry->needTmp) continue; #endif @@ -2019,9 +2100,11 @@ void fgArgInfo::EvalArgsToTemps() { // Create a copy of the temp to go into the late argument list tmpVarNum = curArgTabEntry->tmpNum; - defArg = compiler->fgMakeTmpArgNode(tmpVarNum); + defArg = compiler->fgMakeTmpArgNode( + tmpVarNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters)); - /* mark the original node as a late argument */ + // mark the original node as a late argument argx->gtFlags |= GTF_LATE_ARG; } else @@ -2036,7 +2119,7 @@ void fgArgInfo::EvalArgsToTemps() } #endif -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(argx->gtType != TYP_STRUCT); #endif @@ -2160,11 +2243,11 @@ void fgArgInfo::EvalArgsToTemps() /* For a TYP_STRUCT we also need to record the class handle of the arg */ CORINFO_CLASS_HANDLE clsHnd = NULL; -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(argx->gtType != TYP_STRUCT); -#else // _TARGET_AMD664_ +#else // _TARGET_AMD64_ if (defArg->gtType == TYP_STRUCT) { @@ -2429,6 +2512,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif unsigned argSlots = 0; + unsigned nonRegPassedStructSlots = 0; bool lateArgsComputed = (call->gtCallLateArgs != nullptr); bool callHasRetBuffArg = ((call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); @@ -2606,13 +2690,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) (call->gtCallObjp->gtType == TYP_I_IMPL)); /* this is a register argument - put it in the table */ - call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); + call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , false, REG_STK, nullptr +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + ); } else { /* this is a register argument - possibly update it in the table */ call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); } + // this can't be a struct. + assert(argx->gtType != TYP_STRUCT); /* Increment the argument register count and argument index */ if (!varTypeIsFloating(argx->gtType)) @@ -2714,9 +2804,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // _TARGET_ARM_ +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + bool nonRegPassableStruct = false; + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + bool hasStructArgument = false; for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) { GenTreePtr * parentArgx = &args->gtOp.gtOp1; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!hasStructArgument) + { + hasStructArgument = (args->gtOp.gtOp1->TypeGet() == TYP_STRUCT); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING argx = fgMorphTree(*parentArgx); *parentArgx = argx; flagsSummary |= argx->gtFlags; @@ -2741,7 +2844,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned size = 0; CORINFO_CLASS_HANDLE copyBlkClass = NULL; - bool isRegArg; + bool isRegArg = false; fgArgTabEntryPtr argEntry = NULL; @@ -2816,14 +2919,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #elif defined(_TARGET_AMD64_) - - passUsingFloatRegs = varTypeIsFloating(argx); - #if defined(UNIX_AMD64_ABI) + if (lateArgsComputed) + { + passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); + } + else + { + passUsingFloatRegs = varTypeIsFloating(argx); + } bool passUsingIntRegs; passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); -#endif // UNIX_AMD64_ABI - +#else // !UNIX_AMD64_ABI + passUsingFloatRegs = varTypeIsFloating(argx); +#endif // !UNIX_AMD64_ABI #elif defined(_TARGET_X86_) passUsingFloatRegs = false; @@ -2836,6 +2945,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use var_types structBaseType = TYP_STRUCT; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned int structFloatRegs = 0; + unsigned int structIntRegs = 0; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + bool isStructArg = argx->gtType == TYP_STRUCT; + if (lateArgsComputed) { assert(argEntry != NULL); @@ -2870,12 +2985,24 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // stack slots, or both if the argument is split between the registers and the stack. // - if (argx->IsArgPlaceHolderNode() || (argx->gtType != TYP_STRUCT)) + if (argx->IsArgPlaceHolderNode() || (!isStructArg)) { #if defined(_TARGET_AMD64_) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!isStructArg) + { + size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' + } + else + { + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc); + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) - if (argx->gtType == TYP_STRUCT) + if (isStructArg) { // Structs are eith passed in 1 or 2 (64-bit) slots size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; @@ -2891,7 +3018,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = 1; // On ARM64, all primitives fit in a single (64-bit) 'slot' } #elif defined(_TARGET_ARM_) - if (argx->gtType == TYP_STRUCT) + if (isStructArg) { size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; } @@ -2915,10 +3042,26 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) else // argx->gtType == TYP_STRUCT { /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ - if (argx->gtOper == GT_MKREFANY) + if (argx->gtOper == GT_MKREFANY) { + if (argx->TypeGet() == TYP_STRUCT) + { + isStructArg = true; + } #ifdef _TARGET_AMD64_ - size = 1; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argx->TypeGet() == TYP_STRUCT) + { + size = info.compCompHnd->getClassSize(impGetRefAnyClass()); + unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE); + size = roundupSize / TARGET_POINTER_SIZE; + eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + size = 1; + } #else size = 2; #endif @@ -2942,22 +3085,42 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) BADCODE("illegal argument tree in fgMorphArgs"); CORINFO_CLASS_HANDLE ldObjClass = argLdobj->gtLdObj.gtClass; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + eeGetSystemVAmd64PassStructInRegisterDescriptor(ldObjClass, &structDesc); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + unsigned originalSize = info.compCompHnd->getClassSize(ldObjClass); + originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize); unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); bool passStructByRef = false; #ifndef _TARGET_X86_ +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING // Check for TYP_STRUCT argument with size 1, 2, 4 or 8 bytes // As we can optimize these by turning them into a GT_IND of the correct type - if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize-1)) != 0)) + if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { // Normalize 'size' to the number of pointer sized items // 'size' is the number of register slots that we will use to pass the argument size = roundupSize / TARGET_POINTER_SIZE; #if defined(_TARGET_AMD64_) +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING size = 1; // This must be copied to a temp and passed by address passStructByRef = true; copyBlkClass = ldObjClass; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!structDesc.passedInRegisters) + { + passStructByRef = false; + copyBlkClass = NULL; + } + else + { + passStructByRef = true; + copyBlkClass = ldObjClass; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) if (size > 2) { @@ -2985,6 +3148,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_ARM_ } +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING else { // change our GT_LDOBJ into a GT_IND of the correct type @@ -3109,10 +3273,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = 1; } -#endif // not _TARGET_X86_ +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // not _TARGET_X86_ // We still have a TYP_STRUCT unless we converted the GT_LDOBJ into a GT_IND above... - if ((structBaseType == TYP_STRUCT) && !passStructByRef) { // if the valuetype size is not a multiple of sizeof(void*), @@ -3158,8 +3322,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // Figure out if the argument will be passed in a register. // + bool passedInRegisters = true; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + passedInRegisters = !isStructArg; + if (!passedInRegisters) + { + if (structDesc.passedInRegisters) + { + passedInRegisters = true; + } + else + { + passedInRegisters = false; + } + } - if (isRegParamType(genActualType(argx->TypeGet()))) +#endif + if (passedInRegisters && isRegParamType(genActualType(argx->TypeGet()))) { #ifdef _TARGET_ARM_ if (passUsingFloatRegs) @@ -3192,13 +3371,48 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #else // _TARGET_ARM_ #if defined(UNIX_AMD64_ABI) - if (passUsingFloatRegs) + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Here a struct can be passed in register following the classifications of its members and size. + // Now make sure there are actually enough registers to do so. + if (isStructArg) { - isRegArg = fltArgRegNum < MAX_FLOAT_REG_ARG; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger || + structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference) + { + structIntRegs++; + } + else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE) + { + structFloatRegs++; + } + } + + if (((nextFltArgRegNum + structFloatRegs) > MAX_FLOAT_REG_ARG) || + ((intArgRegNum + structIntRegs) > MAX_REG_ARG)) + { + isRegArg = false; + nonRegPassableStruct = true; + } + else + { + isRegArg = true; + nonRegPassableStruct = false; + } } else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - isRegArg = intArgRegNum < MAX_REG_ARG; + if (passUsingFloatRegs) + { + isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } } #else // !defined(UNIX_AMD64_ABI) isRegArg = intArgRegNum < maxRegArgs; @@ -3208,6 +3422,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) else { isRegArg = false; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + nonRegPassableStruct = true; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } } @@ -3245,16 +3463,67 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_ARM_ - if (isRegArg) { - // fill in or update the argInfo table + regNumber nextRegNum = REG_STK; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber nextOtherRegNum = REG_STK; + + if (isStructArg) + { + // It is a struct passed in registers. Assign the next available register. + unsigned int curIntReg = intArgRegNum; + unsigned int curFloatReg = nextFltArgRegNum; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger || + structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference) + { + if (i == 0) + { + nextRegNum = genMapIntRegArgNumToRegNum(curIntReg); + } + else if (i == 1) + { + nextOtherRegNum = genMapIntRegArgNumToRegNum(curIntReg); + } + else + { + assert(false && "fgMorphArgs Invalid index for int classification."); + } - regNumber nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum); + curIntReg++; + } + else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE) + { + if (i == 0) + { + nextRegNum = genMapFloatRegArgNumToRegNum(curFloatReg); + } + else if (i == 1) + { + nextOtherRegNum = genMapFloatRegArgNumToRegNum(curFloatReg); + } + else + { + assert(false && "fgMorphArgs Invalid index for SSE classification."); + } + curFloatReg++; + } + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // fill in or update the argInfo table + nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum); + } #ifdef _TARGET_AMD64_ - assert(size == 1); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(size == 1); +#endif #endif #ifndef LEGACY_BACKEND @@ -3263,14 +3532,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // They should not affect the placement of any other args or stack space required. // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - bool nonStandardFound = false; for (int i=0; i<nonStandardArgs.Height(); i++) { hasNonStandardArg = true; if (argx == nonStandardArgs.Index(i).node) { - fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nonStandardArgs.Index(i).reg, size, argAlign); + fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, + args, nonStandardArgs.Index(i).reg, size, argAlign +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + , isStructArg, nextOtherRegNum, &structDesc +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + ); argEntry->isNonStandard = true; argIndex++; nonStandardFound = true; @@ -3283,9 +3556,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (!lateArgsComputed) { - /* This is a register argument - put it in the table */ - - fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign); + // This is a register argument - put it in the table + fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg( + argIndex, argx, args, nextRegNum, size, argAlign +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + , isStructArg, nextOtherRegNum, &structDesc +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + ); (void)newArg; //prevent "unused variable" error from GCC #ifdef _TARGET_ARM_ newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs @@ -3294,7 +3571,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } else { - /* This is a register argument - possibly update it in the table */ + // This is a register argument - possibly update it in the table fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign); if (entry->isNonStandard) { @@ -3306,45 +3583,55 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // Setup the next argRegNum value if (!isBackFilled) { - if (passUsingFloatRegs) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (isStructArg) { - fltArgRegNum += size; -#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); -#endif // _TARGET_AMD64_ -#ifdef _TARGET_ARM_ - if (fltArgRegNum > MAX_FLOAT_REG_ARG) - { - // This indicates a partial enregistration of a struct type - assert(argx->gtType == TYP_STRUCT); - unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); - assert((unsigned char)numRegsPartial == numRegsPartial); - call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); - fltArgRegNum = MAX_FLOAT_REG_ARG; - } -#endif // _TARGET_ARM_ + intArgRegNum += structIntRegs; + fltArgRegNum += structFloatRegs; } else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - intArgRegNum += size; + if (passUsingFloatRegs) + { + fltArgRegNum += size; #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE); - fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); #endif // _TARGET_AMD64_ #ifdef _TARGET_ARM_ - if (intArgRegNum > MAX_REG_ARG) - { - // This indicates a partial enregistration of a struct type - assert((argx->gtType == TYP_STRUCT) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); - unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG); - assert((unsigned char)numRegsPartial == numRegsPartial); - call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); - intArgRegNum = MAX_REG_ARG; - fgPtrArgCntCur += size - numRegsPartial; + if (fltArgRegNum > MAX_FLOAT_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert(isStructArg); + unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + fltArgRegNum = MAX_FLOAT_REG_ARG; + } +#endif // _TARGET_ARM_ } + else + { + intArgRegNum += size; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE); + fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); +#endif // _TARGET_AMD64_ +#ifdef _TARGET_ARM_ + if (intArgRegNum > MAX_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert((isStructArg) || argx->OperIsCopyBlkOp() || + (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); + unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + intArgRegNum = MAX_REG_ARG; + fgPtrArgCntCur += size - numRegsPartial; + } #endif // _TARGET_ARM_ + } } } } @@ -3352,27 +3639,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { fgPtrArgCntCur += size; - /* If the register arguments have not been determined then we must fill in the argInfo */ + // If the register arguments have not been determined then we must fill in the argInfo if (!lateArgsComputed) { - /* This is a stack argument - put it in the table */ - call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign); + // This is a stack argument - put it in the table + call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg)); + } else { - /* This is a stack argument - possibly update it in the table */ + // This is a stack argument - possibly update it in the table call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign); } } - if (copyBlkClass != NULL) { noway_assert(!lateArgsComputed); - fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); + fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc)); } #ifdef _TARGET_AMD64_ + if (argx->gtOper == GT_MKREFANY) { // 'Lower' the MKREFANY tree and insert it. @@ -3406,10 +3694,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_AMD64_ - argIndex++; - argSlots += size; - +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (nonRegPassableStruct) + { + nonRegPassedStructSlots += size; + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + argSlots += size; } // end foreach argument loop if (!lateArgsComputed) @@ -3478,18 +3771,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // and ignores floating point args (it is overly conservative in that case). if (argSlots <= MAX_REG_ARG) { - preallocatedArgCount = 0; + preallocatedArgCount = nonRegPassedStructSlots; } else { - preallocatedArgCount = argSlots - MAX_REG_ARG; + preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG; } #elif defined(_TARGET_AMD64_) preallocatedArgCount = max(4, argSlots); #else #error Unsupported or unset target architecture #endif // _TARGET_* - if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize) { lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; @@ -3514,39 +3806,242 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // If the register arguments have already been determined // or we have no register arguments then we are done. - if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg)) + bool needEvalArgsToTemps = true; + + if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument)) { - return call; + needEvalArgsToTemps = false; } - // This is the first time that we morph this call AND it has register arguments. - // Follow into the code below and do the 'defer or eval to temp' analysis. + if (needEvalArgsToTemps) + { + // This is the first time that we morph this call AND it has register arguments. + // Follow into the code below and do the 'defer or eval to temp' analysis. - call->fgArgInfo->SortArgs(); + call->fgArgInfo->SortArgs(); - call->fgArgInfo->EvalArgsToTemps(); + call->fgArgInfo->EvalArgsToTemps(); - // We may have updated the arguments - if (call->gtCallArgs) - { - UpdateGT_LISTFlags(call->gtCallArgs); + // We may have updated the arguments + if (call->gtCallArgs) + { + UpdateGT_LISTFlags(call->gtCallArgs); + } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Rewrite the struct args to be passed by value on stack or in registers. + fgMorphSystemVStructArgs(call, hasStructArgument); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + return call; } #ifdef _PREFAST_ #pragma warning(pop) #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// fgMorphSystemVStructArgs: +// Rewrite the struct args to be passed by value on stack or in registers. +// +// args: +// call: The cll whose arguments need to be morphed.. +// hasStructArgument: Whether this call has struct arguments. +// +void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument) +{ + unsigned flagsSummary = 0; + GenTreePtr args; + GenTreePtr argx; + + if (hasStructArgument) + { + fgArgInfoPtr allArgInfo = call->fgArgInfo; + + for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) + { + // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. + // For suchlate args the gtCallArgList contains the setup arg node (ealuating the arg.) + // The tree from the gtCallLateArgs list is passed to the calle. The fgArgEntry node cointains the mapping + // between the nodes in both lists. If the arg is not a late arg, the fgArgEntryt->node points to itself, + // otherwise points to the list in the late args list. + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + GenTreePtr arg = argx; + bool argListCreated = false; + + var_types type = arg->TypeGet(); + + if (type == TYP_STRUCT) + { + // If we have already processed the arg... + if (arg->OperGet() == GT_LIST && arg->TypeGet() == TYP_STRUCT) + { + continue; + } + + // If already LDOBJ it is set properly already. + if (arg->OperGet() == GT_LDOBJ) + { + assert(!fgEntryPtr->structDesc.passedInRegisters); + continue; + } + + assert( + arg->OperGet() == GT_ADDR || + arg->OperGet() == GT_LCL_FLD || + arg->OperGet() == GT_LCL_VAR); + + assert( + arg->OperGet() == GT_LCL_VAR || + arg->OperGet() == GT_LCL_FLD || + arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || + arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR); + + GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ? + arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon(); + if (fgEntryPtr->structDesc.passedInRegisters) + { + if (fgEntryPtr->structDesc.eightByteCount == 1) + { + // Change the type and below the code will change the LclVar to a LCL_FLD + type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], fgEntryPtr->structDesc.eightByteSizes[0]); + } + else if (fgEntryPtr->structDesc.eightByteCount == 2) + { + // Create LCL_FLD for each eightbyte. + argListCreated = true; + + // Second eightbyte. + GenTreeLclFld* newLclField = new(this, GT_LCL_FLD) GenTreeLclFld( + GetTypeFromClassificationAndSizes( + fgEntryPtr->structDesc.eightByteClassifications[1], + fgEntryPtr->structDesc.eightByteSizes[1]), + lclCommon->gtLclNum, + fgEntryPtr->structDesc.eightByteOffsets[1]); + GenTreeArgList* secondNode = gtNewListNode(newLclField, nullptr); + secondNode->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case. + newLclField->gtFieldSeq = FieldSeqStore::NotAField(); + + // First field + arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); + arg->gtType = GetTypeFromClassificationAndSizes( + fgEntryPtr->structDesc.eightByteClassifications[0], + fgEntryPtr->structDesc.eightByteSizes[0]); + arg = gtNewListNode(arg, secondNode); + arg->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case. + } + else + { + assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes for the CLR. + } + } + + // If we didn't change the type of the struct, it means + // its classification doesn't support to be passed directly through a + // register, so we need to pass a pointer to the destination where + // where we copied the struct to. + if (!argListCreated) + { + if (fgEntryPtr->structDesc.passedInRegisters) + { + arg->gtType = type; + } + else + { + arg->gtType = TYP_I_IMPL; + + // Make sure this is an addr node. + if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR) + { + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + } + + assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR); + + // Ldobj the temp to use it as a call argument + arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(lclCommon->gtLclNum)); + arg->gtFlags |= GTF_EXCEPT; + flagsSummary |= GTF_EXCEPT; + } + } + } + + if (argx != arg) + { + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + + fgEntryPtr->node = arg; + if (isLateArg) + { + lateList->gtOp.gtOp1 = arg; + } + else + { + args->gtOp.gtOp1 = arg; + } + } + } + } + + // Update the flags + call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg void -Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass) +Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, + GenTree* args, + unsigned argIndex, + CORINFO_CLASS_HANDLE copyBlkClass + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)) { GenTree* argx = args->Current(); - noway_assert(argx->gtOper != GT_MKREFANY); - // See if we need to insert a copy at all // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time // but if there is only one use and no loops, the use must be last. @@ -3616,8 +4111,6 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned fgCurrentlyInUseArgTemps->setBit(tmp); - - // TYP_SIMD structs should not be enregistered, since ABI requires it to be // allocated on stack and address of it needs to be passed. if (lclVarIsSIMDType(tmp)) @@ -3648,13 +4141,16 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned #if FEATURE_FIXED_OUT_ARGS // Do the copy early, and evalute the temp later (see EvalArgsToTemps) + // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode GenTreePtr arg = copyBlk; #else // FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression - GenTreePtr arg = fgMakeTmpArgNode(tmp); + GenTreePtr arg = fgMakeTmpArgNode( + tmp + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters)); // Change the expression to "(tmp=val),tmp" arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); @@ -3718,30 +4214,60 @@ void Compiler::fgFixupStructReturn(GenTreePtr call) { bool callHasRetBuffArg = ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT && call->gtCall.gtRetClsHnd != NO_CLASS_HANDLE) + { + eeGetSystemVAmd64PassStructInRegisterDescriptor(GetStructClassHandle(call), &structDesc); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT) { -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) if (call->gtCall.IsVarargs() || !IsHfa(call)) -#endif +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!structDesc.passedInRegisters) +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { // Now that we are past the importer, re-type this node so the register predictor does // the right thing call->gtType = genActualType((var_types)call->gtCall.gtReturnType); } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + else + { + if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1) + { + call->gtType = genActualType(getEightByteType(structDesc, 0)); + } + } +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } - #ifdef _TARGET_ARM_ // Either we don't have a struct now or if struct, then it is HFA returned in regs. assert(call->TypeGet() != TYP_STRUCT || (IsHfa(call) && !callHasRetBuffArg)); #else +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. + assert((call->TypeGet() != TYP_STRUCT) || + (structDesc.passedInRegisters) || + (callHasRetBuffArg)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // No more struct returns assert(call->TypeGet() != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #endif +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // If there is a struct that is returned in registers there might be a retbuf (homing space for the return) and type struct. + assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID) || (call->TypeGet() == TYP_STRUCT)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // If it was a struct return, it has been transformed into a call // with a return buffer (that returns TYP_VOID) or into a return // of a primitive/enregisterable type assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID)); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } @@ -4698,7 +5224,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma ); } #endif - if (fldOffset != 0) { // Generate the "addr" node. @@ -5180,6 +5705,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } // Get the size of the struct and see if it is 1, 2, 4 or 8 bytes in size + // For Amd64-Unix the call below checks to see if the struct is register passable. if (argx->OperGet() == GT_LDOBJ) { #ifdef _TARGET_AMD64_ @@ -5634,6 +6160,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; #endif +#ifdef FEATURE_PAL + if (!canFastTailCall && szFailReason == nullptr) + { + szFailReason = "Non fast tail calls disabled for PAL based systems."; + } +#endif // FEATURE_PAL + if (szFailReason != nullptr) { #ifdef DEBUG @@ -5659,13 +6192,6 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) compCurBB->bbJumpKind = BBJ_RETURN; #endif -#ifdef FEATURE_PAL - if (!canFastTailCall) - { - goto NO_TAIL_CALL; - } -#endif // FEATURE_PAL - // Set this flag before calling fgMorphCall() to prevent inlining this call. call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; @@ -5847,6 +6373,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // This is a HFA, use float 0. callType = TYP_FLOAT; } +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Return a dummy node, as the return is already removed. + if (callType == TYP_STRUCT) + { + // This is an register-returned struct. Return a 0. + callType = TYP_INT; + } #endif result = gtNewZeroConNode(genActualType(callType)); result = fgMorphTree(result); @@ -5990,7 +6523,6 @@ NO_TAIL_CALL: retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); lvaSetStruct(retValTmpNum, structHnd, true); - dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); } } @@ -6400,6 +6932,7 @@ ONE_SIMPLE_ASG: if (lclVarTree->TypeGet() == TYP_STRUCT && (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) { + // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) goto GENERAL_BLKOP; } @@ -7203,8 +7736,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) { // Spill the (complex) address to a BYREF temp. // Note, at most one address may need to be spilled. - addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + lvaTable[addrSpillTemp].lvType = TYP_I_IMPL; + + tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_I_IMPL), + addrSpill); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING lvaTable[addrSpillTemp].lvType = TYP_BYREF; if (addrSpillIsStackDest) @@ -7214,6 +7752,8 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifndef LEGACY_BACKEND // If we are assigning the address of a LclVar here // liveness does not account for this kind of address taken use. @@ -9529,7 +10069,7 @@ COMPARE: case GT_ADD: -CM_OVF_OP: + CM_OVF_OP : if (tree->gtOverflow()) { tree->gtRequestSetFlags(); @@ -10906,7 +11446,9 @@ ASG_OP: if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) { if (tree->gtOverflow() || op1->gtOverflow()) + { break; + } ssize_t imul = op2->gtIntCon.gtIconVal; ssize_t iadd = add->gtIntCon.gtIconVal; @@ -12825,7 +13367,11 @@ void Compiler::fgMorphBlocks() //replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. if (genReturnLocal != BAD_VAR_NUM) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(info.compRetType != TYP_VOID); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(block->bbTreeList); GenTreePtr last = block->bbTreeList->gtPrev; @@ -13834,9 +14380,9 @@ void Compiler::fgPromoteStructs() break; } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (!varDsc->lvDontPromote) -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { #ifdef FEATURE_SIMD if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) @@ -14154,6 +14700,8 @@ void Compiler::fgMarkImplicitByRefArgs() size = info.compCompHnd->getClassSize(typeHnd); } + +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #if defined(_TARGET_AMD64_) if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) #elif defined(_TARGET_ARM64_) @@ -14184,6 +14732,7 @@ void Compiler::fgMarkImplicitByRefArgs() varDsc->lvKeepType = 1; #endif // DEBUG } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } } diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 839f497f4a..89945301f0 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -667,7 +667,7 @@ void Compiler::raSetupArgMasks(RegState *regState) #endif // LEGACY_BACKEND // The code to set the regState for each arg is outlined for shared use -// by linear scan +// by linear scan. (It is not shared for System V AMD64 platform.) regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) { regNumber inArgReg = argDsc->lvArgReg; diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp index a108713792..53a5960967 100644 --- a/src/jit/scopeinfo.cpp +++ b/src/jit/scopeinfo.cpp @@ -909,21 +909,65 @@ void CodeGen::psiBegProlog() psiScope * newScope = psiNewPrologScope(varScope->vsdLVnum, varScope->vsdVarNum); - if (lclVarDsc1->lvIsRegArg) + if (lclVarDsc1->lvIsRegArg) { -#ifdef DEBUG - var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet()); -#ifdef _TARGET_ARM_ - if (lclVarDsc1->lvIsHfaRegArg) + bool isStructHandled = false; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (lclVarDsc1->TypeGet() == TYP_STRUCT) { - regType = lclVarDsc1->GetHfaType(); + CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + + for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++) + { + unsigned len = structDesc.eightByteSizes[nCnt]; + var_types regType = TYP_UNDEF; + regNumber regNum = REG_NA; + if (nCnt == 0) + { + regNum = lclVarDsc1->lvArgReg; + } + else if (nCnt == 1) + { + regNum = lclVarDsc1->lvOtherArgReg; + } + else + { + assert(false && "Invalid eightbyte number."); + } + + regType = compiler->getEightByteType(structDesc, nCnt); +#ifdef DEBUG + regType = compiler->mangleVarArgsType(regType); + assert(genMapRegNumToRegArgNum(regNum, regType) != (unsigned)-1); +#endif // DEBUG + + newScope->scRegister = true; + newScope->u1.scRegNum = (regNumberSmall)regNum; + } + + isStructHandled = true; } +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!isStructHandled) + { +#ifdef DEBUG + var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet()); +#ifdef _TARGET_ARM_ + if (lclVarDsc1->lvIsHfaRegArg) + { + regType = lclVarDsc1->GetHfaType(); + } #endif // _TARGET_ARM_ - assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1); + assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1); #endif // DEBUG - newScope->scRegister = true; - newScope->u1.scRegNum = (regNumberSmall) lclVarDsc1->lvArgReg; + newScope->scRegister = true; + newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg; + } } else { diff --git a/src/jit/target.h b/src/jit/target.h index f4aad4e153..767eb31d8d 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -19,6 +19,12 @@ #endif #endif +#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) +#define FEATURE_VARARG 0 +#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) +#define FEATURE_VARARG 1 +#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) + /*****************************************************************************/ // The following are intended to capture only those #defines that cannot be replaced // with static const members of Target @@ -971,10 +977,28 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_LNGRET REG_EAX #define RBM_LNGRET RBM_EAX +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + #define REG_INTRET_1 REG_RDX + #define RBM_INTRET_1 RBM_RDX + + #define REG_LNGRET_1 REG_RDX + #define RBM_LNGRET_1 RBM_RDX +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + #define REG_FLOATRET REG_XMM0 #define RBM_FLOATRET RBM_XMM0 + #define REG_DOUBLERET REG_XMM0 #define RBM_DOUBLERET RBM_XMM0 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#define REG_FLOATRET_1 REG_XMM1 +#define RBM_FLOATRET_1 RBM_XMM1 + +#define REG_DOUBLERET_1 REG_XMM1 +#define RBM_DOUBLERET_1 RBM_XMM1 +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "rbp" @@ -1872,7 +1896,7 @@ extern const regMaskSmall regMasks[REG_COUNT]; inline regMaskTP genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); -#if defined _TARGET_AMD64_ +#ifdef _TARGET_AMD64_ // shift is faster than a L1 hit on modern x86 // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK |