diff options
74 files changed, 6735 insertions, 951 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1be2864ecb..2ac0ebb07a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -345,6 +345,11 @@ endif (WIN32) endif (OVERRIDE_CMAKE_CXX_FLAGS) +if(CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64) +add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) +add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING) +endif (CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64) + OPTION(CMAKE_ENABLE_CODE_COVERAGE "Enable code coverage" OFF) if(CMAKE_ENABLE_CODE_COVERAGE) diff --git a/src/debug/daccess/nidump.cpp b/src/debug/daccess/nidump.cpp index 44569d9874..c90c29f752 100644 --- a/src/debug/daccess/nidump.cpp +++ b/src/debug/daccess/nidump.cpp @@ -5678,7 +5678,12 @@ NativeImageDumper::EnumMnemonics s_MTFlagsLow[] = MTFLAG_ENTRY(HasVariance), MTFLAG_ENTRY(HasDefaultCtor), MTFLAG_ENTRY(HasPreciseInitCctors), +#if defined(FEATURE_HFA) MTFLAG_ENTRY(IsHFA), +#endif // FEATURE_HFA +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + MTFLAG_ENTRY(IsRegStructPassed), +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF MTFLAG_ENTRY(UNUSED_ComponentSize_4), MTFLAG_ENTRY(UNUSED_ComponentSize_5), MTFLAG_ENTRY(UNUSED_ComponentSize_6), diff --git a/src/inc/corinfo.h b/src/inc/corinfo.h index e0004a5948..cc2ce720b8 100644 --- a/src/inc/corinfo.h +++ b/src/inc/corinfo.h @@ -190,9 +190,10 @@ TODO: Talk about initializing strutures before use #include <specstrings.h> // For System V on the CLR type system number of registers to pass in and return a struct is the same. -#define SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS 2 -#define SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS -#define SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS 16 +// The CLR type system allows only up to 2 eightbytes to be passed in registers. There is no SSEUP classification types. +#define CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS 2 +#define CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS 2 +#define CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS 16 // System V struct passing // The Classification types are described in the ABI spec at http://www.x86-64.org/documentation/abi.pdf @@ -212,7 +213,7 @@ enum SystemVClassificationType : unsigned __int8 SystemVClassificationTypeMAX = 7, }; - +// Represents classification information for a struct. struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR { SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR() @@ -220,19 +221,40 @@ struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR Initialize(); } - bool canPassInRegisters; - unsigned int eightByteCount; - SystemVClassificationType eightByteClassifications[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; - unsigned int eightByteSizes[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; - unsigned int eightByteOffsets[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + bool passedInRegisters; // Whether the struct is passable/passed (this includes struct returning) in registers. + unsigned __int8 eightByteCount; // Number of eightbytes for this struct. + SystemVClassificationType eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The eightbytes type classification. + unsigned __int8 eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The size of the eightbytes (an eightbyte could include padding. This represents the no padding size of the eightbyte). + unsigned __int8 eightByteOffsets[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The start offset of the eightbytes (in bytes). + + + //------------------------------------------------------------------------ + // CopyFrom: Copies a struct classification into this one. + // + // Arguments: + // 'copyFrom' the struct classification to copy from. + // + void CopyFrom(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& copyFrom) + { + passedInRegisters = copyFrom.passedInRegisters; + eightByteCount = copyFrom.eightByteCount; + + for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++) + { + eightByteClassifications[i] = copyFrom.eightByteClassifications[i]; + eightByteSizes[i] = copyFrom.eightByteSizes[i]; + eightByteOffsets[i] = copyFrom.eightByteOffsets[i]; + } + } // Members +private: void Initialize() { - canPassInRegisters = false; + passedInRegisters = false; eightByteCount = 0; - for (int i = 0; i < SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++) + for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++) { eightByteClassifications[i] = SystemVClassificationTypeUnknown; eightByteSizes[i] = 0; diff --git a/src/inc/winwrap.h b/src/inc/winwrap.h index a670a51de0..c0c43eb74c 100644 --- a/src/inc/winwrap.h +++ b/src/inc/winwrap.h @@ -854,9 +854,13 @@ InterlockedCompareExchangePointer ( // Interlockedxxx64 that do not have intrinsics are only supported on Windows Server 2003 // or higher for X86 so define our own portable implementation +#undef InterlockedIncrement64 #define InterlockedIncrement64 __InterlockedIncrement64 +#undef InterlockedDecrement64 #define InterlockedDecrement64 __InterlockedDecrement64 +#undef InterlockedExchange64 #define InterlockedExchange64 __InterlockedExchange64 +#undef InterlockedExchangeAdd64 #define InterlockedExchangeAdd64 __InterlockedExchangeAdd64 __forceinline LONGLONG __InterlockedIncrement64(LONGLONG volatile *Addend) diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 0828a160c9..ea3cce6cc8 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -3648,7 +3648,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, RegState *regState) { #ifdef DEBUG - if (verbose) + if (verbose) printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int"); #endif @@ -3678,6 +3678,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, struct { unsigned varNum; // index into compiler->lvaTable[] for this register argument +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types type; // the Jit type of this regArgTab entry +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to // argument register number 'x'. Only used when circular = true. @@ -3691,18 +3694,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, unsigned varNum; LclVarDsc * varDsc; - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; - varNum++ , varDsc++) + varNum++, varDsc++) { /* Is this variable a register arg? */ - - if (!varDsc->lvIsParam) + if (!varDsc->lvIsParam) + { continue; + } - if (!varDsc->lvIsRegArg) + if (!varDsc->lvIsRegArg) + { continue; + } // When we have a promoted struct we have two possible LclVars that can represent the incoming argument // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. @@ -3726,13 +3731,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, // For register arguments that are independent promoted structs we put the promoted field varNum in the regArgTab[] if (varDsc->lvPromoted) + { continue; + } } else { // For register arguments that are not independent promoted structs we put the parent struct varNum in the regArgTab[] if (varDsc->lvIsStructField) + { continue; + } } } @@ -3743,19 +3752,89 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, var_types regType = varDsc->TypeGet(); #endif // !_TARGET_ARM_ - if (isFloatRegType(regType) != doingFloat) - continue; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regType != TYP_STRUCT) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // A struct might be passed partially in XMM register for System V calls. + // So a single arg might use both register files. + if (isFloatRegType(regType) != doingFloat) + { + continue; + } + } - /* Bingo - add it to our table */ - - regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType); - noway_assert(regArgNum < regState->rsCalleeRegArgNum); - noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register) + int slots = 0; - regArgTab[regArgNum].varNum = varNum; - regArgTab[regArgNum].slot = 1; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (varDsc->TypeGet() == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + if (!structDesc.passedInRegisters) + { + // The var is not passed in registers. + continue; + } - int slots = 1; + unsigned firstRegSlot = 0; + for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++) + { + regNumber regNum = varDsc->lvRegNumForSlot(slotCounter); + + var_types regType = compiler->getEightByteType(structDesc, slotCounter); + + regArgNum = genMapRegNumToRegArgNum(regNum, regType); + + if ((!doingFloat && + ((structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeInteger) || + (structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeIntegerReference))) || + (doingFloat && structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeSSE)) + { + // Store the reg for the first slot. + if (slots == 0) + { + firstRegSlot = regArgNum; + } + + // Bingo - add it to our table + noway_assert(regArgNum < regState->rsCalleeRegArgNum); + noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register) + regArgTab[regArgNum].varNum = varNum; + regArgTab[regArgNum].slot = (char)(slotCounter + 1); + regArgTab[regArgNum].type = regType; + slots++; + } + } + + if (slots == 0) + { + continue; // Nothing to do for this regState set. + } + + regArgNum = firstRegSlot; + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // Bingo - add it to our table + regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType); + noway_assert(regArgNum < regState->rsCalleeRegArgNum); + // we better not have added it already (there better not be multiple vars representing this argument register) + noway_assert(regArgTab[regArgNum].slot == 0); + + // Set the register type. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regArgTab[regArgNum].type = regType; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + regArgTab[regArgNum].varNum = varNum; + regArgTab[regArgNum].slot = 1; + + slots = 1; + } #ifdef _TARGET_ARM_ int lclSize = compiler->lvaLclSize(varNum); @@ -3778,9 +3857,23 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (int i = 0; i < slots; i ++) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For structs passed in registers on System V systems, + // get the regType from the table for each slot. + if (regType == TYP_STRUCT) + { + regType = regArgTab[regArgNum + i].type; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType); - assert((i > 0) || (regNum == varDsc->lvArgReg)); + // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold. + // The type of the register depends on the classification of the first eightbyte + // of the struct. For information on classification refer to the System V x86_64 ABI at: + // http://www.x86-64.org/documentation/abi.pdf +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert((i > 0) || (regNum == varDsc->lvArgReg)); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Is the arg dead on entry to the method ? if ((regArgMaskLive & genRegMask(regNum)) == 0) @@ -3831,8 +3924,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, /* If it goes on the stack or in a register that doesn't hold * an argument anymore -> CANNOT form a circular dependency */ - if ( varDsc->lvIsInReg() && - (genRegMask(regNum) & regArgMaskLive) ) + if (varDsc->lvIsInReg() && + (genRegMask(regNum) & regArgMaskLive)) { /* will trash another argument -> possible dependency * We may need several passes after the table is constructed @@ -3841,22 +3934,33 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, /* Maybe the argument stays in the register (IDEAL) */ if ((i == 0) && (varDsc->lvRegNum == regNum)) + { goto NON_DEP; + } +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if ((i == 1) && (varDsc->TypeGet() == TYP_STRUCT) && + (varDsc->lvOtherReg == regNum)) + { + goto NON_DEP; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && - (varDsc->lvOtherReg == regNum)) + (varDsc->lvOtherReg == regNum)) + { goto NON_DEP; + } if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) && - (REG_NEXT(varDsc->lvRegNum) == regNum)) + (REG_NEXT(varDsc->lvRegNum) == regNum)) + { goto NON_DEP; - + } regArgTab[regArgNum+i].circular = true; } else { NON_DEP: - regArgTab[regArgNum+i].circular = false; /* mark the argument register as free */ @@ -3870,7 +3974,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */ bool change = true; - if (regArgMaskLive) { /* Possible circular dependencies still exist; the previous pass was not enough @@ -3882,15 +3985,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { - /* If we already marked the argument as non-circular then continue */ + // If we already marked the argument as non-circular then continue if (!regArgTab[argNum].circular) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; + } - varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); + varNum = regArgTab[argNum].varNum; + noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); @@ -3899,11 +4007,19 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, noway_assert(!regArgTab[argNum].stackArg); regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet()); + regNumber destRegNum; if (regArgTab[argNum].slot == 1) { destRegNum = varDsc->lvRegNum; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + else + { + assert(regArgTab[argNum].slot == 2); + destRegNum = varDsc->lvOtherReg; + } +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) { @@ -3915,7 +4031,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, assert(varDsc->TypeGet() == TYP_DOUBLE); destRegNum = REG_NEXT(varDsc->lvRegNum); } - +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (genRegMask(destRegNum) & regArgMaskLive) { /* we are trashing a live argument register - record it */ @@ -3949,33 +4065,47 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } #endif - // TODO-AMD64-Bug? - homing of float argument registers with circular dependencies. -#ifdef _TARGET_AMD64_ - NYI_IF((regArgMaskLive & RBM_FLTARG_REGS) != 0, "Homing of float argument registers with circular dependencies not implemented"); -#endif // _TARGET_AMD64_ + // LSRA allocates registers to incoming parameters in order and will not overwrite + // a register still holding a live parameter. +#ifndef LEGACY_BACKEND + noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented."); +#endif // LEGACY_BACKEND /* Now move the arguments to their locations. * First consider ones that go on the stack since they may * free some registers. */ regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start - for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { emitAttr size; - /* If the arg is dead on entry to the method, skip it */ + // If this is the wrong register file, just continue. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regArgTab[argNum].type == TYP_UNDEF) + { + // This could happen if the reg in regArgTab[argNum] is of the other register file - + // for System V register passed structs where the first reg is GPR and the second an XMM reg. + // The next register file processing will process it. + continue; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // If the arg is dead on entry to the method, skip it if (regArgTab[argNum].processed) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; + } varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; - /* If not a stack arg go to the next one */ + // If not a stack arg go to the next one #ifndef _TARGET_64BIT_ if (varDsc->lvType == TYP_LONG) @@ -3993,7 +4123,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, #endif // !_TARGET_64BIT_ { if (!regArgTab[argNum].stackArg) + { continue; + } } #if defined(_TARGET_ARM_) @@ -4021,10 +4153,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, { size = EA_SIZE(varDsc->lvSize()); #if defined(_TARGET_AMD64_) - storeType = (var_types) ((size <= 4) ? TYP_INT : TYP_I_IMPL); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + storeType = (var_types)((size <= 4) ? TYP_INT : TYP_I_IMPL); // Must be 1, 2, 4, or 8, or else it wouldn't be passed in a register noway_assert(EA_SIZE_IN_BYTES(size) <= 8); assert((EA_SIZE_IN_BYTES(size) & (EA_SIZE_IN_BYTES(size) - 1)) == 0); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + storeType = regArgTab[argNum].type; + size = emitActualTypeSize(storeType); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) // Must be <= 16 bytes or else it wouldn't be passed in registers noway_assert(EA_SIZE_IN_BYTES(size) <= 16); @@ -4060,7 +4197,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType); - /* Stack argument - if the ref count is 0 don't care about it */ + // Stack argument - if the ref count is 0 don't care about it if (!varDsc->lvOnFrame) { @@ -4084,6 +4221,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, regArgTab[argNum].processed = true; regArgMaskLive &= ~genRegMask(srcRegNum); + #if defined(_TARGET_ARM_) if (storeType == TYP_DOUBLE) { @@ -4094,7 +4232,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } /* Process any circular dependencies */ - if (regArgMaskLive) { unsigned begReg, destReg, srcReg; @@ -4105,21 +4242,39 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, if (doingFloat) { +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #if defined(_TARGET_ARM_) insCopy = INS_vmov; - +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + insCopy = INS_mov; +#else +#error Error. Wrong architecture. +#endif // Compute xtraReg here when we have a float argument assert(xtraReg == REG_NA); regMaskTP fpAvailMask; fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive; +#if defined(_TARGET_ARM_) fpAvailMask &= RBM_DBL_REGS; +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fpAvailMask &= RBM_ALLFLOAT; +#else +#error Error. Wrong architecture. +#endif + if (fpAvailMask == RBM_NONE) { fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive; +#if defined(_TARGET_ARM_) fpAvailMask &= RBM_DBL_REGS; +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fpAvailMask &= RBM_ALLFLOAT; +#else +#error Error. Wrong architecture. +#endif } assert(fpAvailMask != RBM_NONE); @@ -4135,23 +4290,30 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++) { - /* If not a circular dependency then continue */ - + // If not a circular dependency then continue if (!regArgTab[argNum].circular) + { continue; + } - /* If already processed the dependency then continue */ + // If already processed the dependency then continue if (regArgTab[argNum].processed) + { continue; + } if (regArgTab[argNum].slot == 0) // Not a register argument + { continue; - + } + destReg = begReg = argNum; - srcReg = regArgTab[argNum].trashBy; noway_assert(srcReg < regState->rsCalleeRegArgNum); + srcReg = regArgTab[argNum].trashBy; + noway_assert(srcReg < regState->rsCalleeRegArgNum); - varNumDest = regArgTab[destReg].varNum; noway_assert(varNumDest < compiler->lvaCount); + varNumDest = regArgTab[destReg].varNum; + noway_assert(varNumDest < compiler->lvaCount); varDscDest = compiler->lvaTable + varNumDest; noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg); @@ -4376,6 +4538,18 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, varDsc = compiler->lvaTable + varNum; regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet()); + // If this is the wrong register file, just continue. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (regArgTab[argNum].type == TYP_UNDEF) + { + // This could happen if the reg in regArgTab[argNum] is of the other register file - + // for System V register passed structs where the first reg is GPR and the second an XMM reg. + // The next register file processing will process it. + regArgMaskLive &= ~genRegMask(regNum); + continue; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); #ifndef _WIN64 //Right now we think that incoming arguments are not pointer sized. When we eventually @@ -4506,7 +4680,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } #endif } - + noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop } } @@ -6729,12 +6903,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, regNumber argReg = varDsc->lvArgReg; getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); +#if FEATURE_VARARG if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) { regNumber intArgReg = compiler->getCallArgIntRegister(argReg); instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); inst_RV_RV(ins, argReg, intArgReg, loadType); } +#endif // FEATURE_VARARG } // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. @@ -8495,6 +8671,7 @@ void CodeGen::genFnProlog() #endif // !LEGACY_BACKEND RegState *regState; + FOREACH_REGISTER_FILE(regState) { if (regState->rsCalleeRegArgMaskLiveIn) @@ -10789,8 +10966,8 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) //------------------------------------------------------------------------ // ARM-specific methods used by both the classic and RyuJIT //------------------------------------------------------------------------ -#ifdef _TARGET_ARM_ -CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree) { if (tree->TypeGet() == TYP_STRUCT) { @@ -10809,7 +10986,7 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) case GT_RETURN: assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR); - return GetHfaClassHandle(tree->gtOp.gtOp1); + return GetStructClassHandle(tree->gtOp.gtOp1); case GT_LDOBJ: return tree->gtLdObj.gtClass; @@ -10823,15 +11000,35 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree) case GT_ASG: assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD); - return GetHfaClassHandle(tree->gtOp.gtOp1); - + return GetStructClassHandle(tree->gtOp.gtOp1); default: - unreached(); + return NO_CLASS_HANDLE; } } return NO_CLASS_HANDLE; } +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +bool Compiler::IsRegisterPassable(CORINFO_CLASS_HANDLE hClass) +{ + if (hClass == NO_CLASS_HANDLE) + { + return false; + } + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc); + return structDesc.passedInRegisters; +} +bool Compiler::IsRegisterPassable(GenTreePtr tree) +{ + return IsRegisterPassable(GetStructClassHandle(tree)); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + +#ifdef _TARGET_ARM_ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) { return varTypeIsFloating(GetHfaType(hClass)); @@ -10839,12 +11036,12 @@ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) bool Compiler::IsHfa(GenTreePtr tree) { - return IsHfa(GetHfaClassHandle(tree)); + return IsHfa(GetStructClassHandle(tree)); } var_types Compiler::GetHfaType(GenTreePtr tree) { - return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetHfaClassHandle(tree)) : TYP_UNDEF; + return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetStructClassHandle(tree)) : TYP_UNDEF; } unsigned Compiler::GetHfaSlots(GenTreePtr tree) diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index e37322d3b4..0914f7d7d6 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -12870,7 +12870,7 @@ void CodeGen::genCodeForBBlist() genStackLevel = 0; #if FEATURE_STACK_FP_X87 genResetFPstkLevel(); -#endif //FEATURE_STACK_FP_X87 +#endif // FEATURE_STACK_FP_X87 #if !FEATURE_FIXED_OUT_ARGS /* Check for inserted throw blocks and adjust genStackLevel */ diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 57eac7ced4..6a030eb926 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -103,6 +103,10 @@ void genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + void genConsumePutArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); @@ -126,6 +130,11 @@ void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + void genCodeForPutArgRepMovs(GenTreePutArgStk* putArgStkNode); + void genCodeForPutArgUnroll(GenTreePutArgStk* putArgStkNode); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); @@ -150,6 +159,18 @@ void genJmpMethod(GenTreePtr jmp); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, + var_types* type0, + var_types* type1, + emitAttr* size0, + emitAttr* size1, + unsigned __int8* offset0, + unsigned __int8* offset1); + + bool genStoreRegisterReturnInLclVar(GenTreePtr treeNode); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void genLclHeap(GenTreePtr tree); bool genIsRegCandidateLocal (GenTreePtr tree) diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 076ba7c262..7064862c4c 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -785,7 +785,6 @@ void CodeGen::genCodeForBBlist() #endif /* Both stacks should always be empty on exit from a basic block */ - noway_assert(genStackLevel == 0); #ifdef _TARGET_AMD64_ @@ -1571,6 +1570,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) { assert(!isRegCandidate); + emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0); genProduceReg(treeNode); @@ -1618,85 +1618,98 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_STORE_LCL_FLD: { - noway_assert(targetType != TYP_STRUCT); - noway_assert(!treeNode->InReg()); - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!genStoreRegisterReturnInLclVar(treeNode)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + noway_assert(targetType != TYP_STRUCT); + noway_assert(!treeNode->InReg()); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); #ifdef FEATURE_SIMD - // storing of TYP_SIMD12 (i.e. Vector3) field - if (treeNode->TypeGet() == TYP_SIMD12) - { - genStoreLclFldTypeSIMD12(treeNode); - break; - } + // storing of TYP_SIMD12 (i.e. Vector3) field + if (treeNode->TypeGet() == TYP_SIMD12) + { + genStoreLclFldTypeSIMD12(treeNode); + break; + } #endif - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); - emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + } } break; case GT_STORE_LCL_VAR: { - noway_assert(targetType != TYP_STRUCT); - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!genStoreRegisterReturnInLclVar(treeNode)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + noway_assert(targetType != TYP_STRUCT); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); #if !defined(_TARGET_64BIT_) - if (treeNode->TypeGet() == TYP_LONG) - { - genStoreLongLclVar(treeNode); - break; - } + if (treeNode->TypeGet() == TYP_LONG) + { + genStoreLongLclVar(treeNode); + break; + } #endif // !defined(_TARGET_64BIT_) - GenTreePtr op1 = treeNode->gtOp.gtOp1; - genConsumeRegs(op1); - if (treeNode->gtRegNum == REG_NA) - { - // stack store - emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); - varDsc->lvRegNum = REG_STK; - } - else - { - bool containedOp1 = op1->isContained(); - // Look for the case where we have a constant zero which we've marked for reuse, - // but which isn't actually in the register we want. In that case, it's better to create - // zero in the target register, because an xor is smaller than a copy. Note that we could - // potentially handle this in the register allocator, but we can't always catch it there - // because the target may not have a register allocated for it yet. - if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero()) + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + + if (treeNode->gtRegNum == REG_NA) { - op1->gtRegNum = REG_NA; - op1->ResetReuseRegVal(); - containedOp1 = true; + // stack store + emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); + varDsc->lvRegNum = REG_STK; } - if (containedOp1) + else { - // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register - // must be a constant. However, in the future we might want to support a contained memory op. - // This is a bit tricky because we have to decide it's contained before register allocation, - // and this would be a case where, once that's done, we need to mark that node as always - // requiring a register - which we always assume now anyway, but once we "optimize" that - // we'll have to take cases like this into account. - assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); - genSetRegToConst(treeNode->gtRegNum, targetType, op1); + bool containedOp1 = op1->isContained(); + // Look for the case where we have a constant zero which we've marked for reuse, + // but which isn't actually in the register we want. In that case, it's better to create + // zero in the target register, because an xor is smaller than a copy. Note that we could + // potentially handle this in the register allocator, but we can't always catch it there + // because the target may not have a register allocated for it yet. + if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero()) + { + op1->gtRegNum = REG_NA; + op1->ResetReuseRegVal(); + containedOp1 = true; + } + if (containedOp1) + { + // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register + // must be a constant. However, in the future we might want to support a contained memory op. + // This is a bit tricky because we have to decide it's contained before register allocation, + // and this would be a case where, once that's done, we need to mark that node as always + // requiring a register - which we always assume now anyway, but once we "optimize" that + // we'll have to take cases like this into account. + assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); + genSetRegToConst(treeNode->gtRegNum, targetType, op1); + } + else if (op1->gtRegNum != treeNode->gtRegNum) + { + assert(op1->gtRegNum != REG_NA); + emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); + } } - else if (op1->gtRegNum != treeNode->gtRegNum) + if (treeNode->gtRegNum != REG_NA) { - assert(op1->gtRegNum != REG_NA); - emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); + genProduceReg(treeNode); } } - if (treeNode->gtRegNum != REG_NA) - genProduceReg(treeNode); } break; @@ -1717,6 +1730,15 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtOp.gtOp1; if (targetType == TYP_VOID) { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (compiler->info.compRetBuffArg != BAD_VAR_NUM) + { + // System V AMD64 spec requires that when a struct is returned by a hidden + // argument the RAX should contain the value of the hidden retbuf arg. + emit->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(op1 == nullptr); } #if !defined(_TARGET_64BIT_) @@ -1742,53 +1764,233 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) #endif // !defined(_TARGET_64BIT_) else { - assert(op1 != nullptr); - noway_assert(op1->gtRegNum != REG_NA); - - // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has - // consumed a reg for the operand. This is because the variable - // is dead after return. But we are issuing more instructions - // like "profiler leave callback" after this consumption. So - // if you are issuing more instructions after this point, - // remember to keep the variable live up until the new method - // exit point where it is actually dead. - genConsumeReg(op1); - - regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; -#ifdef _TARGET_X86_ - if (varTypeIsFloating(treeNode)) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (treeNode->TypeGet() == TYP_STRUCT && + treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR) { - if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister) + GenTreeLclVarCommon* lclVarPtr = treeNode->gtOp.gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + assert(varDsc->lvDontPromote); + + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + regNumber retReg0 = REG_NA; + emitAttr size0 = EA_UNKNOWN; + unsigned offset0 = structDesc.eightByteOffsets[0]; + regNumber retReg1 = REG_NA; + emitAttr size1 = EA_UNKNOWN; + unsigned offset1 = structDesc.eightByteOffsets[1]; + + bool firstIntUsed = false; + bool firstFloatUsed = false; + + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + switch (structDesc.eightByteClassifications[0]) { - // Store local variable to its home location, if necessary. - if ((op1->gtFlags & GTF_REG_VAL) != 0) + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_INTRET; + size0 = EA_4BYTE; + type0 = TYP_INT; + firstIntUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_LNGRET; + size0 = EA_8BYTE; + type0 = TYP_LONG; + firstIntUsed = true; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + retReg0 = REG_LNGRET; + size0 = EA_GCREF; + type0 = TYP_REF; + firstIntUsed = true; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_FLOATRET; + size0 = EA_4BYTE; + type0 = TYP_FLOAT; + firstFloatUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_DOUBLERET; + size0 = EA_8BYTE; + type0 = TYP_DOUBLE; + firstFloatUsed = true; + } + else { - op1->gtFlags &= ~GTF_REG_VAL; - inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum); + assert(false && "Bat float type."); // Not possible. } - // Now, load it to the fp stack. - getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); + break; + default: + assert(false && "Bad EightByte classification."); + break; } - else + + // Set the second eight byte data + switch (structDesc.eightByteClassifications[1]) { - // Spill the value, which should be in a register, then load it to the fp stack. - // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). - op1->gtFlags |= GTF_SPILL; - regSet.rsSpillTree(op1->gtRegNum, op1); - op1->gtFlags |= GTF_SPILLED; - op1->gtFlags &= ~GTF_SPILL; - - TempDsc* t = regSet.rsUnspillInPlace(op1); - inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); - op1->gtFlags &= ~GTF_SPILLED; - compiler->tmpRlsTemp(t); + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstIntUsed) + { + retReg1 = REG_INTRET_1; + } + else + { + retReg1 = REG_INTRET; + } + type1 = TYP_INT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_LONG; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_REF; + size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstFloatUsed) + { + retReg1 = REG_FLOATRET_1; + } + else + { + retReg1 = REG_FLOATRET; + } + type1 = TYP_FLOAT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstFloatUsed) + { + retReg1 = REG_DOUBLERET_1; + } + else + { + retReg1 = REG_DOUBLERET; + } + type1 = TYP_DOUBLE; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; } + + // Move the values into the return registers. + // + emit->emitIns_R_S(ins_Load(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0); + emit->emitIns_R_S(ins_Load(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1); } else -#endif // _TARGET_X86_ - if (op1->gtRegNum != retReg) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { - inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); + assert(op1 != nullptr); + noway_assert(op1->gtRegNum != REG_NA); + + // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has + // consumed a reg for the operand. This is because the variable + // is dead after return. But we are issuing more instructions + // like "profiler leave callback" after this consumption. So + // if you are issuing more instructions after this point, + // remember to keep the variable live up until the new method + // exit point where it is actually dead. + genConsumeReg(op1); + + regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; +#ifdef _TARGET_X86_ + if (varTypeIsFloating(treeNode)) + { + if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister) + { + // Store local variable to its home location, if necessary. + if ((op1->gtFlags & GTF_REG_VAL) != 0) + { + op1->gtFlags &= ~GTF_REG_VAL; + inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum); + } + // Now, load it to the fp stack. + getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); + } + else + { + // Spill the value, which should be in a register, then load it to the fp stack. + // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). + op1->gtFlags |= GTF_SPILL; + regSet.rsSpillTree(op1->gtRegNum, op1); + op1->gtFlags |= GTF_SPILLED; + op1->gtFlags &= ~GTF_SPILL; + + TempDsc* t = regSet.rsUnspillInPlace(op1); + inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); + op1->gtFlags &= ~GTF_SPILLED; + compiler->tmpRlsTemp(t); + } + } + else +#endif // _TARGET_X86_ + { + if (op1->gtRegNum != retReg) + { + inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); + } + } } } @@ -2468,6 +2670,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genPutArgStk(treeNode); #else // !_TARGET_X86_ { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (targetType == TYP_STRUCT) + { + genPutArgStk(treeNode); + break; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(targetType != TYP_STRUCT); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); @@ -2536,8 +2746,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_PUTARG_REG: { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(targetType != TYP_STRUCT); - +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // commas show up here commonly, as part of a nullchk operation GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it @@ -2546,8 +2757,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); } + genProduceReg(treeNode); } - genProduceReg(treeNode); break; case GT_CALL: @@ -2767,6 +2978,198 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// This method handles storing double register return struct value to a +// local homing stack location. +// It returns true if this is a struct and storing of the returned +// register value is handled. It returns false otherwise. +bool +CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode) +{ + if (treeNode->TypeGet() == TYP_STRUCT) + { + noway_assert(!treeNode->InReg()); + + GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon(); + + LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + + assert(structDesc.passedInRegisters); + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + genConsumeRegs(op1); + + regNumber retReg0 = REG_NA; + emitAttr size0 = EA_UNKNOWN; + unsigned offset0 = structDesc.eightByteOffsets[0]; + regNumber retReg1 = REG_NA; + emitAttr size1 = EA_UNKNOWN; + unsigned offset1 = structDesc.eightByteOffsets[1]; + + bool firstIntUsed = false; + bool firstFloatUsed = false; + + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + switch (structDesc.eightByteClassifications[0]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_INTRET; + size0 = EA_4BYTE; + type0 = TYP_INT; + firstIntUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_LNGRET; + size0 = EA_8BYTE; + type0 = TYP_LONG; + firstIntUsed = true; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + retReg0 = REG_LNGRET; + size0 = EA_GCREF; + type0 = TYP_REF; + firstIntUsed = true; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + retReg0 = REG_FLOATRET; + size0 = EA_4BYTE; + type0 = TYP_FLOAT; + firstFloatUsed = true; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + retReg0 = REG_DOUBLERET; + size0 = EA_8BYTE; + type0 = TYP_DOUBLE; + firstFloatUsed = true; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + + // Set the second eight byte data + switch (structDesc.eightByteClassifications[1]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstIntUsed) + { + retReg1 = REG_INTRET_1; + } + else + { + retReg1 = REG_INTRET; + } + type1 = TYP_INT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_LONG; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + if (firstIntUsed) + { + retReg1 = REG_LNGRET_1; + } + else + { + retReg1 = REG_LNGRET; + } + type1 = TYP_REF; + size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + if (firstFloatUsed) + { + retReg1 = REG_FLOATRET_1; + } + else + { + retReg1 = REG_FLOATRET; + } + type1 = TYP_FLOAT; + size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + if (firstFloatUsed) + { + retReg1 = REG_DOUBLERET_1; + } + else + { + retReg1 = REG_DOUBLERET; + } + type1 = TYP_DOUBLE; + size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + + // Move the values into the return registers. + // + + getEmitter()->emitIns_S_R(ins_Store(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0); + getEmitter()->emitIns_S_R(ins_Store(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1); + + return true; + } + + return false; +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Generate code for division (or mod) by power of two // or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) @@ -3366,40 +3769,55 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode) // Generate code for a load from some address + offset -// base: tree node which can be either a local address or arbitrary node -// offset: distance from the base from which to load -void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset) { emitter *emit = getEmitter(); - if (base->OperIsLocalAddr()) + if (baseNode->OperIsLocalAddr()) { - if (base->gtOper == GT_LCL_FLD_ADDR) - offset += base->gtLclFld.gtLclOffs; - emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset); + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + offset += baseNode->gtLclFld.gtLclOffs; + emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset); } else { - emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset); + emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset); } } // Generate code for a store to some address + offset -// base: tree node which can be either a local address or arbitrary node -// offset: distance from the base from which to load -void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset) { emitter *emit = getEmitter(); - if (base->OperIsLocalAddr()) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (baseNode->OperGet() == GT_PUTARG_STK) { - if (base->gtOper == GT_LCL_FLD_ADDR) - offset += base->gtLclFld.gtLclOffs; - emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset); + GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk(); + assert(putArgStkNode->gtOp.gtOp1->isContained()); + assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ); + + emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar, + (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset); } else +#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING { - emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset); + + if (baseNode->OperIsLocalAddr()) + { + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + offset += baseNode->gtLclFld.gtLclOffs; + emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset); + } + else + { + emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset); + } } } @@ -3523,6 +3941,126 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode) instGen(INS_r_movsb); } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// Generates PutArg code by performing a loop unroll +// +// TODO-Amd64-Unix: Try to share code with copyblk. +// The difference for now is thethe putarg_stk contains it's children, while cpyblk not. +// This creates differences in code. After some significant refactoring it could be reused. +void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr srcAddr = putArgNode->gtOp.gtOp1; + + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; + assert(size <= CPBLK_UNROLL_LIMIT); + + emitter *emit = getEmitter(); + + assert(srcAddr->isContained()); + assert(srcAddr->gtOper == GT_LDOBJ); + + if (!srcAddr->gtOp.gtOp1->isContained()) + { + genConsumeReg(srcAddr->gtOp.gtOp1); + } + + unsigned offset = 0; + + // If the size of this struct is larger than 16 bytes + // let's use SSE2 to be able to do 16 byte at a time + // loads and stores. + if (size >= XMM_REGSIZE_BYTES) + { + assert(putArgNode->gtRsvdRegs != RBM_NONE); + regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + assert(genIsValidFloatReg(xmmReg)); + size_t slots = size / XMM_REGSIZE_BYTES; + + while (slots-- > 0) + { + // Load + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node. + // Store + genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset); + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (15 bytes or less) if there's one. + if ((size & 0xf) != 0) + { + // Grab the integer temp register to emit the remaining loads and stores. + regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + + if ((size & 8) != 0) + { +#ifdef _TARGET_X86_ + // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. + for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + } +#else // !_TARGET_X86_ + genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset); + offset += 8; +#endif // !_TARGET_X86_ + } + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + offset += 4; + } + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset); + offset += 2; + } + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset); + genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset); + } + } +} + +// Generate code for CpBlk by using rep movs +// Preconditions: +// The size argument of the PutArgStk (for structs) is a constant and is between +// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode) +{ + + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr srcAddr = putArgNode->gtOp.gtOp1; +#ifdef DEBUG + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; +#endif // DEBUG + + // Validate state. + assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); + +#ifdef DEBUG + assert(srcAddr->isContained()); + +#ifdef _TARGET_AMD64_ + assert(size > CPBLK_UNROLL_LIMIT); +#else + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); +#endif + +#endif // DEBUG + genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX); + instGen(INS_r_movsb); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. // This will generate a sequence of movsq instructions for the cases of non-gc members @@ -3686,7 +4224,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) { #ifdef _TARGET_AMD64_ // Make sure we got the arguments of the cpblk operation in the right registers - GenTreePtr blockSize = cpBlkNode->Size(); + GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); @@ -3705,7 +4243,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); #else // !_TARGET_AMD64_ - NYI_X86("Helper call for CpBlk"); + noway_assert(false && "Helper call for CpBlk is not needed."); #endif // !_TARGET_AMD64_ } @@ -4558,7 +5096,9 @@ regNumber CodeGen::genConsumeReg(GenTree *tree) // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar genUpdateLife(tree); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(tree->gtRegNum != REG_NA); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // there are three cases where consuming a reg means clearing the bit in the live mask // 1. it was not produced by a local @@ -4678,6 +5218,82 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree) } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) +{ + // The putArgNode children are always contained. We should not consume any registers. + + GenTree* dst = putArgNode; + +#ifdef DEBUG + // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.) + GenTree* src = putArgNode->gtOp.gtOp1; + assert(src->OperGet() == GT_LDOBJ); + src = src->gtOp.gtOp1; +#else // !DEBUG + // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.) + GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1; +#endif // !DEBUG + + size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE; + GenTree* op1; + GenTree* op2; + + regNumber reg1, reg2, reg3; + op1 = dst; + reg1 = dstReg; + op2 = src; + reg2 = srcReg; + reg3 = sizeReg; + + if (reg2 != REG_NA && op2->gtRegNum != REG_NA) + { + genConsumeReg(op2); + } + + if ((reg1 != REG_NA) && (op1->gtRegNum != reg1)) + { +#if FEATURE_FIXED_OUT_ARGS + // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI. + LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar]; + int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE; + // Outgoing area always on top of the stack (relative to rsp.) + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset); +#else // !FEATURE_FIXED_OUT_ARGS + NYI_X86("Stack args for x86/RyuJIT"); +#endif // !FEATURE_FIXED_OUT_ARGS + + } + + if (op2->gtRegNum != reg2) + { + if (src->OperIsLocalAddr()) + { + // The OperLocalAddr is always contained. + assert(src->isContained()); + GenTreeLclVarCommon* lclNode = src->AsLclVarCommon(); + + // Generate LEA instruction to load the LclVar address in RSI. + LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum]; + int offset = varLclDsc->lvStkOffs; + + // Otutgoing area always on top of the stack (relative to rsp.) + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset); + } + else + { + assert(src->gtRegNum != REG_NA); + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum); + } + } + + if ((reg3 != REG_NA)) + { + inst_RV_IV(INS_mov, reg3, size, EA_8BYTE); + } +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) { // We have to consume the registers, and perform any copies, in the actual execution order. @@ -4827,7 +5443,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) } } - // generates an ip-relative call or indirect call via reg ('call reg') // pass in 'addr' for a relative call or 'base' for a indirect register call // methHnd - optional, only used for pretty printing @@ -4843,9 +5458,9 @@ void CodeGen::genEmitCall(int callType, bool isJump, bool isNoGC) { -#ifndef _TARGET_X86_ +#if !defined(_TARGET_X86_) ssize_t argSize = 0; -#endif // !_TARGET_X86_ +#endif // !defined(_TARGET_X86_) getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) @@ -4867,14 +5482,14 @@ void CodeGen::genEmitCall(int callType, void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) - GenTreeIndir* indir + GenTreeIndir* indir X86_ARG(ssize_t argSize), emitAttr retSize, IL_OFFSETX ilOffset) { -#ifndef _TARGET_X86_ +#if !defined(_TARGET_X86_) ssize_t argSize = 0; -#endif // !_TARGET_X86_ +#endif // !defined(_TARGET_X86_) genConsumeAddress(indir->Addr()); getEmitter()->emitIns_Call(emitter::EmitCallType(callType), @@ -4920,13 +5535,49 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (curArgTabEntry->regNum == REG_STK) continue; - regNumber argReg = curArgTabEntry->regNum; - genConsumeReg(argNode); - if (argNode->gtRegNum != argReg) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_LIST) { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + for (; argListPtr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + regNumber argReg = REG_NA; + if (iterationNum == 0) + { + argReg = curArgTabEntry->regNum; + } + else if (iterationNum == 1) + { + argReg = curArgTabEntry->otherRegNum; + } + else + { + assert(false); // Illegal state. + } + + genConsumeReg(putArgRegNode); + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum); + } + } + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } } +#if FEATURE_VARARG // In the case of a varargs call, // the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the @@ -4937,9 +5588,10 @@ void CodeGen::genCallInstruction(GenTreePtr node) instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG); inst_RV_RV(ins, argNode->gtRegNum, targetReg); } +#endif // FEATURE_VARARG } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // The call will pop its arguments. // for each putarg_stk: ssize_t stackArgBytes = 0; @@ -4949,16 +5601,31 @@ void CodeGen::genCallInstruction(GenTreePtr node) GenTreePtr arg = args->gtOp.gtOp1; if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG)) { +#if defined(_TARGET_X86_) assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG)); if (arg->OperGet() == GT_LONG) { assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); } +#endif // defined(_TARGET_X86_) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (genActualType(arg->TypeGet()) == TYP_STRUCT) + { + if (arg->OperGet() == GT_PUTARG_STK) + { + GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj(); + stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass); + } + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); } args = args->gtOp.gtOp2; } -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Insert a null check on "this" pointer if asked. if (call->NeedsNullCheck()) @@ -5056,9 +5723,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(), -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset); } @@ -5070,9 +5737,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir(), -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset); } @@ -5086,9 +5753,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, //addr -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) retSize, ilOffset, genConsumeReg(target)); @@ -5153,9 +5820,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) stackArgBytes, -#endif // _TARGET_X86_ +#endif // _defined(_TARGET_X86_) retSize, ilOffset); } @@ -5168,10 +5835,10 @@ void CodeGen::genCallInstruction(GenTreePtr node) genPendingCallLabel = nullptr; } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) // The call will pop its arguments. genStackLevel -= stackArgBytes; -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) // Update GC info: // All Callee arg registers are trashed and no longer contain any GC pointers. @@ -5218,6 +5885,130 @@ void CodeGen::genCallInstruction(GenTreePtr node) } } +//------------------------------------------------------------------------ +// genGetStructTypeSizeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems. +// +// Arguments: +// 'structDesc' struct description +// 'type0' returns the type of the first eightbyte. +// 'type1' returns the type of the second eightbyte. +// 'size0' returns the size of the first eightbyte. +// 'size1' returns the size of the second eightbyte. +// 'offset0' returns the offset of the first eightbyte. +// 'offset1' returns the offset of the second eightbyte. +// + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +void CodeGen::genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, + var_types* type0, var_types* type1, emitAttr* size0, emitAttr* size1, + unsigned __int8* offset0, unsigned __int8* offset1) +{ + *size0 = EA_UNKNOWN; + *offset0 = structDesc.eightByteOffsets[0]; + *size1 = EA_UNKNOWN; + *offset1 = structDesc.eightByteOffsets[1]; + + *type0 = TYP_UNKNOWN; + *type1 = TYP_UNKNOWN; + + // Set the first eightbyte data + if (structDesc.eightByteCount >= 1) + { + switch (structDesc.eightByteClassifications[0]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[0] <= 4) + { + *size0 = EA_4BYTE; + *type0 = TYP_INT; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + *size0 = EA_8BYTE; + *type0 = TYP_LONG; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES); + *size0 = EA_GCREF; + *type0 = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[0] <= 4) + { + *size0 = EA_4BYTE; + *type0 = TYP_FLOAT; + } + else if (structDesc.eightByteSizes[0] <= 8) + { + *size0 = EA_8BYTE; + *type0 = TYP_DOUBLE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + } + + // Set the second eight byte data + if (structDesc.eightByteCount == 2) + { + switch (structDesc.eightByteClassifications[1]) + { + case SystemVClassificationTypeInteger: + if (structDesc.eightByteSizes[1] <= 4) + { + *type1 = TYP_INT; + *size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + *type1 = TYP_LONG; + *size1 = EA_8BYTE; + } + else + { + assert(false && "Bad int type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES); + *type1 = TYP_REF; + *size1 = EA_GCREF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[1] <= 4) + { + *type1 = TYP_FLOAT; + *size1 = EA_4BYTE; + } + else if (structDesc.eightByteSizes[1] <= 8) + { + *type1 = TYP_DOUBLE; + *size1 = EA_8BYTE; + } + else + { + assert(false && "Bat float type."); // Not possible. + } + break; + default: + assert(false && "Bad EightByte classification."); + break; + } + } +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Produce code for a GT_JMP node. // The arguments of the caller needs to be transferred to the callee before exiting caller. // The actual jump to callee is generated as part of caller epilog sequence. @@ -5319,36 +6110,94 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) if (!varDsc->lvIsRegArg) continue; - // Register argument - noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (varDsc->lvType == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); - // Is register argument already in the right register? - // If not load it from its stack location. - var_types loadType = varDsc->lvaArgType(); - regNumber argReg = varDsc->lvArgReg; // incoming arg register + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); - if (varDsc->lvRegNum != argReg) - { - assert(genIsValidReg(argReg)); + emitAttr size0 = EA_UNKNOWN; + emitAttr size1 = EA_UNKNOWN; + unsigned __int8 offset0 = 0; + unsigned __int8 offset1 = 0; + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Get the eightbyte data + genGetStructTypeSizeOffset(structDesc, &type0, &type1, &size0, &size1, &offset0, &offset1); + + // Move the values into the right registers. + // + if (type0 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type0), size0, varDsc->lvArgReg, varNum, offset0); + + // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0); + } + + if (type1 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type1), size1, varDsc->lvOtherArgReg, varNum, offset1); - getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1); + } - // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. - // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. - // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block - // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). - regSet.rsMaskVars |= genRegMask(argReg); - gcInfo.gcMarkRegPtrVal(argReg, loadType); if (varDsc->lvTracked) { - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } + else +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // Register argument + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + // Is register argument already in the right register? + // If not load it from its stack location. + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + + if (varDsc->lvRegNum != argReg) + { + assert(genIsValidReg(argReg)); + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.rsMaskVars |= genRegMask(argReg); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + if (varDsc->lvTracked) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + } + } + } + +#if FEATURE_VARARG // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register. if (compiler->info.compIsVarArgs) { regNumber intArgReg; + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + if (varTypeIsFloating(loadType)) { intArgReg = compiler->getCallArgIntRegister(argReg); @@ -5368,8 +6217,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) firstArgVarNum = varNum; } } +#endif // FEATURE_VARARG } +#if FEATURE_VARARG // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, // load the remaining arg registers (both int and float) from the corresponding // shadow stack slots. This is for the reason that we don't know the number and type @@ -5409,7 +6260,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) getEmitter()->emitEnableGC(); } } - +#endif // FEATURE_VARARG } // produce code for a GT_LEA subnode @@ -6488,13 +7339,122 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode) genProduceReg(treeNode); } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +//--------------------------------------------------------------------- +// genPutArgStk - generate code for putting a struct arg on the stack by value. +// In case there are references to heap object in the struct, +// it generates the gcinfo as well. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// void CodeGen::genPutArgStk(GenTreePtr treeNode) { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(treeNode->OperGet() == GT_PUTARG_STK); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING var_types targetType = treeNode->TypeGet(); +#ifdef _TARGET_X86_ noway_assert(targetType != TYP_STRUCT); +#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(targetType == TYP_STRUCT); + + GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk(); + if (putArgStk->gtNumberReferenceSlots == 0) + { + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::PutArgStkKindRepInstr: + genCodeForPutArgRepMovs(putArgStk); + break; + case GenTreePutArgStk::PutArgStkKindUnroll: + genCodeForPutArgUnroll(putArgStk); + break; + default: + unreached(); + } + } + else + { + // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA); + GenTreePtr dstAddr = putArgStk; + GenTreePtr srcAddr = putArgStk->gtOp.gtOp1; + gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet()); + gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); + + unsigned slots = putArgStk->gtNumSlots; + + // We are always on the stack we don't need to use the write barrier. + BYTE* gcPtrs = putArgStk->gtGcPtrs; + unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots; + + unsigned i = 0; + unsigned copiedSlots = 0; + while (i < slots) + { + switch (gcPtrs[i]) + { + case TYPE_GC_NONE: + // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // to save cycles and code size. + { + unsigned nonGcSlotCount = 0; + + do + { + nonGcSlotCount++; + i++; + } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); + + // If we have a very small contiguous non-gc region, it's better just to + // emit a sequence of movsq instructions + if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + copiedSlots += nonGcSlotCount; + while (nonGcSlotCount > 0) + { + instGen(INS_movsq); + nonGcSlotCount--; + } + } + else + { + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + copiedSlots += nonGcSlotCount; + instGen(INS_r_movsq); + } + } + break; + default: + // We have a GC pointer + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction, + // but the logic for emitting a GC info record is not available (it is internal for the emitter only.) + // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info. + + getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0); + getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar, + ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE)); + getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE); + copiedSlots++; + gcPtrCount--; + i++; + } + } + + gcInfo.gcMarkRegSetNpt(RBM_RSI); + gcInfo.gcMarkRegSetNpt(RBM_RDI); + } + return; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); GenTreePtr data = treeNode->gtOp.gtOp1; @@ -6508,7 +7468,9 @@ CodeGen::genPutArgStk(GenTreePtr treeNode) // Decrement SP. int argSize = genTypeSize(genActualType(targetType)); inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING genStackLevel += argSize; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? if (data->isContained()) @@ -6522,7 +7484,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode) getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); } } -#endif // _TARGET_X86_ +#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /***************************************************************************** * diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 427d778b90..b54657202a 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -2992,7 +2992,6 @@ void Compiler::compCompile(void * * methodCodePtr, unsigned compileFlags) { hashBv::Init(this); - VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal()); /* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */ @@ -4042,7 +4041,6 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas unsigned compileFlags, CorInfoInstantiationVerification instVerInfo) { - CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd; info.compCode = methodInfo->ILCode; @@ -5027,6 +5025,125 @@ START: return result; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +// GetTypeFromClassificationAndSizes: +// Returns the type of the eightbyte accounting for the classification and size of the eightbyte. +// +// args: +// classType: classification type +// size: size of the eightbyte. +// +var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size) +{ + var_types type = TYP_UNKNOWN; + switch (classType) + { + case SystemVClassificationTypeInteger: + if (size == 1) + { + type = TYP_BYTE; + } + else if (size <= 2) + { + type = TYP_SHORT; + } + else if (size <= 4) + { + type = TYP_INT; + } + else if (size <= 8) + { + type = TYP_LONG; + } + else + { + assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type."); + } + break; + case SystemVClassificationTypeIntegerReference: + type = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (size <= 4) + { + type = TYP_FLOAT; + } + else if (size <= 8) + { + type = TYP_DOUBLE; + } + else + { + assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type."); + } + break; + + default: + assert(false && "GetTypeFromClassificationAndSizes Invalid classification type."); + break; + } + + return type; +} + +// getEightByteType: +// Returns the type of the struct description and slot number of the eightbyte. +// +// args: +// structDesc: struct classification description. +// slotNum: eightbyte slot number for the struct. +// +var_types Compiler::getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum) +{ + var_types eightByteType = TYP_UNDEF; + unsigned len = structDesc.eightByteSizes[slotNum]; + + switch (structDesc.eightByteClassifications[slotNum]) + { + case SystemVClassificationTypeInteger: + // See typelist.h for jit type definition. + // All the types of size < 4 bytes are of jit type TYP_INT. + if (structDesc.eightByteSizes[slotNum] <= 4) + { + eightByteType = TYP_INT; + } + else if (structDesc.eightByteSizes[slotNum] <= 8) + { + eightByteType = TYP_LONG; + } + else + { + assert(false && "getEightByteType Invalid Integer classification type."); + } + break; + case SystemVClassificationTypeIntegerReference: + assert(len == REGSIZE_BYTES); + eightByteType = TYP_REF; + break; + case SystemVClassificationTypeSSE: + if (structDesc.eightByteSizes[slotNum] <= 4) + { + eightByteType = TYP_FLOAT; + } + else if (structDesc.eightByteSizes[slotNum] <= 8) + { + eightByteType = TYP_DOUBLE; + } + else + { + assert(false && "getEightByteType Invalid SSE classification type."); + } + break; + default: + assert(false && "getEightByteType Invalid classification type."); + break; + } + + return eightByteType; +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + /*****************************************************************************/ /*****************************************************************************/ diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 520c94a462..bc851dcf1d 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -269,9 +269,12 @@ public: unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout" -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned char lvDontPromote:1; // Should struct promoter consider this variable for promotion? - unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument. +#endif + +#ifdef _TARGET_ARM_ + unsigned char lvIsHfaRegArg :1; // Is this argument variable holding a HFA register argument. unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double? #endif @@ -290,7 +293,7 @@ public: unsigned char lvSIMDType :1; // This is a SIMD struct unsigned char lvUsedInSIMDIntrinsic :1; // This tells lclvar is used for simd intrinsic #endif // FEATURE_SIMD - unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct. + unsigned char lvRegStruct :1; // This is a reg-sized non-field-addressed struct. union { @@ -305,6 +308,26 @@ public: unsigned char lvFldOffset; unsigned char lvFldOrdinal; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber lvRegNumForSlot(unsigned slotNum) + { + if (slotNum == 0) + { + return lvArgReg; + } + else if (slotNum == 1) + { + return lvOtherArgReg; + } + else + { + assert(false && "Invalid slotNum!"); + } + + unreached(); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + private: regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair). @@ -314,7 +337,13 @@ private: #if !defined(_TARGET_64BIT_) regNumberSmall _lvOtherReg; // Used for "upper half" of long var. #endif // !defined(_TARGET_64BIT_) + regNumberSmall _lvArgReg; // The register in which this argument is passed. + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + #ifndef LEGACY_BACKEND union { @@ -382,7 +411,7 @@ public: regNumber lvArgReg; regNumber GetArgReg() const -{ + { return (regNumber) _lvArgReg; } @@ -392,6 +421,22 @@ public: assert(_lvArgReg == reg); } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg)) + regNumber lvOtherArgReg; + + regNumber GetOtherArgReg() const + { + return (regNumber)_lvOtherArgReg; + } + + void SetOtherArgReg(regNumber reg) + { + _lvOtherArgReg = (regNumberSmall)reg; + assert(_lvOtherArgReg == reg); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + #ifdef FEATURE_SIMD // Is this is a SIMD struct? bool lvIsSIMDType() const @@ -1139,6 +1184,15 @@ struct FuncInfoDsc struct fgArgTabEntry { + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fgArgTabEntry() + { + otherRegNum = REG_NA; + isStruct = false; // is this a struct arg + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or placeholder // it will point at the actual argument in the gtCallLateArgs list. GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument @@ -1165,6 +1219,13 @@ struct fgArgTabEntry bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments. bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber otherRegNum; // The (second) register to use when passing this argument. + bool isStruct; // is this a struct arg + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + void SetIsHfaRegArg(bool hfaRegArg) { isHfaRegArg = hfaRegArg; @@ -1196,10 +1257,10 @@ class fgArgInfo unsigned nextSlotNum; // Updatable slot count value unsigned stkLevel; // Stack depth when we make this call (for x86) - unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs) - bool argsComplete; // marker for state - bool argsSorted; // marker for state - fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize]) + unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs) + bool argsComplete; // marker for state + bool argsSorted; // marker for state + fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize]) private: @@ -1217,11 +1278,24 @@ public: unsigned numRegs, unsigned alignment); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + fgArgTabEntryPtr AddRegArg (unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment, + const bool isStruct, + const regNumber otherRegNum = REG_NA, + const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + fgArgTabEntryPtr AddStkArg (unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, - unsigned alignment); + unsigned alignment + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct)); void RemorphReset (); fgArgTabEntryPtr RemorphRegArg (unsigned argNum, @@ -1391,7 +1465,9 @@ public: DWORD expensiveDebugCheckLevel; #endif - +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + CORINFO_CLASS_HANDLE GetStructClassHandle(GenTreePtr tree); +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #ifdef _TARGET_ARM_ @@ -1403,8 +1479,6 @@ public: // floating-point registers. // - inline CORINFO_CLASS_HANDLE GetHfaClassHandle(GenTreePtr tree); - bool IsHfa(CORINFO_CLASS_HANDLE hClass); bool IsHfa(GenTreePtr tree); @@ -1417,6 +1491,14 @@ public: #endif // _TARGET_ARM_ //------------------------------------------------------------------------- + // The following is used for struct passing on System V system. + // +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + bool IsRegisterPassable(CORINFO_CLASS_HANDLE hClass); + bool IsRegisterPassable(GenTreePtr tree); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + //------------------------------------------------------------------------- // The following is used for validating format of EH table // @@ -2450,7 +2532,7 @@ public : unsigned char fldOrdinal; var_types fldType; unsigned fldSize; - CORINFO_CLASS_HANDLE fldTypeHnd; + CORINFO_CLASS_HANDLE fldTypeHnd; }; // Info about struct to be promoted. @@ -3006,9 +3088,12 @@ private: bool impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &opcode); void impAbortInline(bool abortThisInlineOnly, bool contextDependent, const char *reason); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass); - GenTreePtr impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); +#endif + +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTreePtr impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); #endif // A free list of linked list nodes used to represent to-do stacks of basic blocks. @@ -3026,9 +3111,11 @@ private: bool impIsValueType (typeInfo* pTypeInfo); var_types mangleVarArgsType (var_types type); + +#if FEATURE_VARARG regNumber getCallArgIntRegister (regNumber floatReg); regNumber getCallArgFloatRegister (regNumber intReg); - +#endif // FEATURE_VARARG //--------------------------- Inlining------------------------------------- #if defined(DEBUG) || MEASURE_INLINING @@ -4080,10 +4167,9 @@ public: bool fgCastNeeded(GenTreePtr tree, var_types toType); GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree); - GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum); - - /* The following check for loops that don't execute calls */ + GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters)); + // The following check for loops that don't execute calls bool fgLoopCallMarked; void fgLoopCallTest (BasicBlock *srcBB, @@ -4450,7 +4536,14 @@ private: GenTreePtr fgMorphCast (GenTreePtr tree); GenTreePtr fgUnwrapProxy (GenTreePtr objRef); GenTreeCall* fgMorphArgs (GenTreeCall* call); - void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass); + + void fgMakeOutgoingStructArgCopy( + GenTreeCall* call, + GenTree* args, + unsigned argIndex, + CORINFO_CLASS_HANDLE copyBlkClass + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr)); + void fgFixupStructReturn (GenTreePtr call); GenTreePtr fgMorphLocalVar (GenTreePtr tree); bool fgAddrCouldBeNull (GenTreePtr addr); @@ -4570,11 +4663,11 @@ private: void fgInsertInlineeBlocks (InlineInfo * pInlineInfo); GenTreePtr fgInlinePrependStatements(InlineInfo * inlineInfo); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree); - GenTreePtr fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); - void fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); -#endif + GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); + void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder; #ifdef DEBUG @@ -6275,6 +6368,17 @@ public : void eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause); + // ICorStaticInfo wrapper functions + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#ifdef DEBUG + static void dumpSystemVClassificationType(SystemVClassificationType ct); +#endif // DEBUG + + void eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd, + /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Utility functions #if defined(DEBUG) @@ -8433,6 +8537,11 @@ public: static HelperCallProperties s_helperCallProperties; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size); + var_types getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum); + void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) }; // end of class Compiler // Inline methods of CompAllocator. @@ -8466,7 +8575,6 @@ LclVarDsc::LclVarDsc(Compiler* comp) { } - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 1cdc939d16..e4168b0f18 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -651,7 +651,10 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, if (type == TYP_STRUCT) { size = info.compCompHnd->getClassSize(typeClass); - +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Account for the classification of the struct. + result = IsRegisterPassable(typeClass); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING switch(size) { case 1: @@ -664,6 +667,7 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, default: break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -2268,8 +2272,10 @@ int Compiler::lvaFrameAddress(int varNum, bool * pFPbased) if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame) { #ifdef _TARGET_AMD64_ - // On amd64, every param has a stack location. + // On amd64, every param has a stack location, except on Unix-like systems. +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(varDsc->lvIsParam); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) // For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location. assert(varDsc->lvIsParam && !varDsc->lvIsRegArg); @@ -2589,6 +2595,8 @@ var_types Compiler::mangleVarArgsType(var_types type) return type; } +// For CORECLR there is no vararg on System V systems. +#if FEATURE_VARARG inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg) { #ifdef _TARGET_AMD64_ @@ -2630,10 +2638,11 @@ inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg) } #else // !_TARGET_AMD64_ // How will float args be passed for RyuJIT/x86? - NYI("getCallArgIntRegister for RyuJIT/x86"); + NYI("getCallArgFloatRegister for RyuJIT/x86"); return REG_NA; #endif // !_TARGET_AMD64_ } +#endif // FEATURE_VARARG /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index 90e50ed84a..4c8e2ff30e 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -281,6 +281,16 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_ // Everything fits into a single 'slot' size // to accommodate irregular sized structs, they are passed byref // TODO-ARM64-Bug?: structs <= 16 bytes get passed in 2 consecutive registers. +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + CORINFO_CLASS_HANDLE argClass; + CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass)); + var_types argType = JITtype2varType(argTypeJit); + if (argType == TYP_STRUCT) + { + unsigned structSize = info.compCompHnd->getClassSize(argClass); + return structSize; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING return sizeof(size_t); #else // !_TARGET_AMD64_ && !_TARGET_ARM64_ @@ -920,6 +930,60 @@ int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field) } } + +/***************************************************************************** + * + * ICorStaticInfo wrapper functions + */ + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#ifdef DEBUG +void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct) +{ + switch (ct) + { + case SystemVClassificationTypeUnknown: printf("UNKNOWN"); break; + case SystemVClassificationTypeStruct: printf("Struct"); break; + case SystemVClassificationTypeNoClass: printf("NoClass"); break; + case SystemVClassificationTypeMemory: printf("Memory"); break; + case SystemVClassificationTypeInteger: printf("Integer"); break; + case SystemVClassificationTypeIntegerReference: printf("IntegerReference"); break; + case SystemVClassificationTypeSSE: printf("SSE"); break; + default: printf("ILLEGAL"); break; + } +} +#endif // DEBUG + +void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd, + /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr) +{ + bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr); + noway_assert(ok); + +#ifdef DEBUG + if (verbose) + { + printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd), eeGetClassName(structHnd)); + printf(" passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters)); + if (structPassInRegDescPtr->passedInRegisters) + { + printf(" eightByteCount = %d\n", structPassInRegDescPtr->eightByteCount); + for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++) + { + printf(" eightByte #%d -- classification: ", i); + dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]); + printf(", byteSize: %d, byteOffset: %d\n", + structPassInRegDescPtr->eightByteSizes[i], + structPassInRegDescPtr->eightByteOffsets[i]); + } + } + } +#endif // DEBUG +} + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + /***************************************************************************** * * Utility functions diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 20f8af3fa2..fa9d3597de 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -5653,8 +5653,9 @@ void emitter::emitRecordGCcall(BYTE * codePos, call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs; call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs; #if EMIT_TRACK_STACK_DEPTH +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned)))); - call->cdArgBaseOffset = (USHORT)(emitCurStackLvl / ((unsigned)sizeof(unsigned))); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #endif // Append the call descriptor to the list */ diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 6f1c6c8fce..d6de1f2dba 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -3671,7 +3671,8 @@ void emitter::emitIns_C(instruction ins, } else if (ins == INS_pop) { - emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0); + emitCurStackLvl -= emitCntStackDepth; + assert((int)emitCurStackLvl >= 0); } #endif // !FEATURE_FIXED_OUT_ARGS @@ -11010,7 +11011,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); - emitStackPop (dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*))); + emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*))); } break; diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 84233d82c6..c26f221c3f 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -8148,17 +8148,67 @@ void Compiler::fgAddInternal() // If there is a return value, then create a temp for it. Real returns will store the value in there and // it'll be reloaded by the single return. - + // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs? + // TODO-ARM64: Does this apply for ARM64 too? +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Create a local temp to store the return if the return type is not void and the + // native return type is not a struct or the native return type is a struct that is returned + // in registers (no RetBuffArg argument.) + // If we fold all returns into a single return statement, create a temp for struct type variables as well. + if (genReturnBB && ((info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT) || + (info.compRetNativeType == TYP_STRUCT && info.compRetBuffArg == BAD_VAR_NUM))) +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (genReturnBB && (info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT)) +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value")); - lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + var_types retLocalType = TYP_STRUCT; + if (info.compRetNativeType == TYP_STRUCT) + { + // If the native ret type is a struct, make sure the right + // normalized type is assigned to the local variable. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + assert(info.compMethodInfo->args.retTypeClass != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc); + if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1) + { + retLocalType = lvaTable[genReturnLocal].lvType = getEightByteType(structDesc, 0); + } + else + { + lvaTable[genReturnLocal].lvType = TYP_STRUCT; + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType); + } if (varTypeIsFloating(lvaTable[genReturnLocal].lvType)) { this->compFloatingPointUsed = true; } - + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Handle a struct return type for System V Amd64 systems. + if (info.compRetNativeType == TYP_STRUCT) + { + // Handle the normalized return type. + if (retLocalType == TYP_STRUCT) + { + lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true); + } + else + { + lvaTable[genReturnLocal].lvVerTypeInfo = typeInfo(TI_STRUCT, info.compMethodInfo->args.retTypeClass); + } + + lvaTable[genReturnLocal].lvDontPromote = true; + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!varTypeIsFloating(info.compRetType)) lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this); #ifdef REG_FLOATRET @@ -8172,7 +8222,6 @@ void Compiler::fgAddInternal() lvaTable[genReturnLocal].lvKeepType = 1; #endif } - else { genReturnLocal = BAD_VAR_NUM; @@ -8442,7 +8491,11 @@ void Compiler::fgAddInternal() //make sure to reload the return value as part of the return (it is saved by the "real return"). if (genReturnLocal != BAD_VAR_NUM) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(info.compRetType != TYP_VOID); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet()); //make sure copy prop ignores this node (make sure it always does a reload from the temp). @@ -21424,7 +21477,7 @@ void Compiler::fgInline() #endif // DEBUG } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /********************************************************************************* * @@ -21463,16 +21516,16 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree) /*************************************************************************************************** * child - The inlinee of the retExpr node. - * retClsHnd - The HFA class handle of the type of the inlinee. + * retClsHnd - The struct class handle of the type of the inlinee. * * Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can * use a copyblock to do the assignment. */ -GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) +GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) { assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY); - unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for HFA inline return candidates.")); + unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates.")); lvaSetStruct(tmpNum, retClsHnd, false); GenTreePtr dst = gtNewLclvNode(tmpNum, TYP_STRUCT); @@ -21518,7 +21571,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN /*************************************************************************************************** * tree - The tree pointer that has one of its child nodes as retExpr. * child - The inlinee child. - * retClsHnd - The HFA class handle of the type of the inlinee. + * retClsHnd - The struct class handle of the type of the inlinee. * * V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or * would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is @@ -21526,7 +21579,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN * a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1)) * can just be op1. */ -void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) +void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd) { // We are okay to have: // 1. V02 = call(); @@ -21541,13 +21594,13 @@ void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINF GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1); GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL) - ? fgAssignHfaInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. + ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. : child); // Just get the address, if not a call. tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this); } -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /***************************************************************************** * Callback to replace the inline return expression place holder (GT_RET_EXPR) @@ -21562,12 +21615,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( if (tree->gtOper == GT_RET_EXPR) { -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // We are going to copy the tree from the inlinee, so save the handle now. CORINFO_CLASS_HANDLE retClsHnd = (tree->TypeGet() == TYP_STRUCT) ? tree->gtRetExpr.gtRetClsHnd : NO_CLASS_HANDLE; -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) do { @@ -21605,32 +21658,36 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( } while (tree->gtOper == GT_RET_EXPR); -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(_TARGET_ARM_) if (retClsHnd != NO_CLASS_HANDLE && comp->IsHfa(retClsHnd)) +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (retClsHnd != NO_CLASS_HANDLE && comp->IsRegisterPassable(retClsHnd)) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { GenTreePtr parent = data->parent; // See assert below, we only look one level above for an asg parent. if (parent->gtOper == GT_ASG) { // Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk. - comp->fgAttachHfaInlineeToAsg(parent, tree, retClsHnd); + comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd); } else { // Just assign the inlinee to a variable to keep it simple. - tree->CopyFrom(comp->fgAssignHfaInlineeToVar(tree, retClsHnd), comp); + tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp); } } -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#if defined(DEBUG) && defined(_TARGET_ARM_) +#if defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) // Make sure we don't have a tree like so: V05 = (, , , retExpr); // Since we only look one level above for the parent for '=' and // do not check if there is a series of COMMAs. See above. // Importer and FlowGraph will not generate such a tree, so just // leaving an assert in here. This can be fixed by looking ahead - // when we visit GT_ASG similar to fgAttachHfaInlineeToAsg. + // when we visit GT_ASG similar to fgAttachStructInlineeToAsg. else if (tree->gtOper == GT_ASG && tree->gtOp.gtOp2->gtOper == GT_COMMA) { @@ -21642,11 +21699,17 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( // empty } +#if defined(_TARGET_ARM_) + noway_assert(comma->gtType != TYP_STRUCT || + comma->gtOper != GT_RET_EXPR || + (!comp->IsHfa(comma->gtRetExpr.gtRetClsHnd))); +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(comma->gtType != TYP_STRUCT || comma->gtOper != GT_RET_EXPR || - !comp->IsHfa(comma->gtRetExpr.gtRetClsHnd)); + (!comp->IsRegisterPassable(comma->gtRetExpr.gtRetClsHnd))); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#endif // defined(DEBUG) && defined(_TARGET_ARM_) +#endif // defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) return WALK_CONTINUE; } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 284000e55b..3c06925fe4 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -224,7 +224,15 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL; } - /* Now set all of the appropriate entries to 'large' */ + // Now set all of the appropriate entries to 'large' + + // On ARM and System V struct returning there + // is code that does GT_ASG-tree.CopyObj call. + // CopyObj is a large node and the GT_ASG is small, which triggers an exception. +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE; +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTree::s_gtNodeSizes[GT_CALL ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_CAST ] = TREE_NODE_SZ_LARGE; @@ -256,6 +264,15 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_MOD ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UMOD ] = TREE_NODE_SZ_LARGE; #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + GenTree::s_gtNodeSizes[GT_PUTARG_STK ] = TREE_NODE_SZ_LARGE; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // In importer for Hfa and register returned structs we rewrite GT_ASG to GT_COPYOBJ/GT_CPYBLK + // Make sure the sizes agree. + assert(GenTree::s_gtNodeSizes[GT_COPYOBJ] <= GenTree::s_gtNodeSizes[GT_ASG]); + assert(GenTree::s_gtNodeSizes[GT_COPYBLK] <= GenTree::s_gtNodeSizes[GT_ASG]); +#endif // !(defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -312,7 +329,12 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef FEATURE_SIMD static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL); #endif // FEATURE_SIMD @@ -4366,13 +4388,21 @@ void GenTree::InsertAfterSelf(GenTree* node, GenTreeStmt* stmt /* = n // 'parent' must be non-null // // Notes: -// Must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context -// in which this method is used) +// For non System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING not defined) +// this method must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context +// in which this method is used). +// If FEATURE_UNIX_AMD64_STRUCT_PASSING is defined we can get here with GT_LDOBJ tree. This happens when +// a struct is passed in two registers. The GT_LDOBJ is converted to a GT_LIST with two GT_LCL_FLDs later +// in Lower/LowerXArch. +// GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(parent->OperGet() != GT_LDOBJ); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + switch (parent->OperGet()) { default: @@ -4380,6 +4410,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) if (this == parent->gtOp.gtOp1) return &(parent->gtOp.gtOp1); if (this == parent->gtOp.gtOp2) return &(parent->gtOp.gtOp2); break; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_LDOBJ: + // Any GT_LDOBJ with a field must be lowered before this point. + noway_assert(parent->AsLdObj()->gtFldTreeList == nullptr); + break; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_CMPXCHG: if (this == parent->gtCmpXchg.gtOpLocation) return &(parent->gtCmpXchg.gtOpLocation); if (this == parent->gtCmpXchg.gtOpValue) return &(parent->gtCmpXchg.gtOpValue); @@ -5027,7 +5065,7 @@ GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inline GenTreePtr node = new(this, GT_RET_EXPR) GenTreeRetExpr(type); node->gtRetExpr.gtInlineCandidate = inlineCandidate; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (inlineCandidate->gtType == TYP_STRUCT) { if (inlineCandidate->gtOper == GT_CALL) @@ -5067,7 +5105,13 @@ GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2) GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op) { - assert((op != NULL) && (op->OperGet() != GT_LIST) && (op->OperGet() != GT_LIST)); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // With structs passed in multiple args we could have the arg + // GT_LIST containing a list of LCL_FLDs + assert((op != NULL) && ((!op->IsList()) || (op->IsListOfLclFlds()))); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert((op != NULL) && (op->OperGet() != GT_LIST)); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING return new (this, GT_LIST) GenTreeArgList(op); } @@ -5079,8 +5123,15 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op) GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op1, GenTreePtr op2) { - assert((op1 != NULL) && (op1->OperGet() != GT_LIST) && (op1->OperGet() != GT_LIST)); - assert((op2 != NULL) && (op2->OperGet() != GT_LIST) && (op2->OperGet() != GT_LIST)); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // With structs passed in multiple args we could have the arg + // GT_LIST containing a list of LCL_FLDs + assert((op1 != NULL) && ((!op1->IsList()) || (op1->IsListOfLclFlds()))); + assert((op2 != NULL) && ((!op2->IsList()) || (op2->IsListOfLclFlds()))); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert((op1 != NULL) && (!op1->IsList())); + assert((op2 != NULL) && (!op2->IsList())); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING GenTreePtr tree; @@ -5207,9 +5258,11 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB // using struct assignment. #ifdef _TARGET_ARM_ assert(isPhiDefn || type != TYP_STRUCT || IsHfa(dst) || IsHfa(src)); -#else +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // You need to use GT_COPYBLK for assigning structs // See impAssignStruct() + assert(isPhiDefn || type != TYP_STRUCT || IsRegisterPassable(dst) || IsRegisterPassable(src)); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING assert(isPhiDefn || type != TYP_STRUCT); #endif @@ -5553,7 +5606,6 @@ GenTreePtr Compiler::gtClone(GenTree * tree, bool complexOK) tree->gtField.gtFldHnd, objp, tree->gtField.gtFldOffset); - } else if (tree->gtOper == GT_ADD) { @@ -8629,6 +8681,51 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt, // get child msg if (tree->IsCall()) { + // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers) + // print the nodes of the nested list and continue to the next argument. + if (listElem->gtOper == GT_LIST) + { + GenTreePtr nextListNested = nullptr; + for (GenTreePtr listNested = listElem; listNested != nullptr; listNested = nextListNested) + { + GenTreePtr listElemNested; + if (listNested->gtOper == GT_LIST) + { + nextListNested = listNested->MoveNext(); + listElemNested = listNested->Current(); + } + else + { + // GT_LIST nodes (under initBlk, others?) can have a non-null op2 that's not a GT_LIST + nextListNested = nullptr; + listElemNested = listNested; + } + + indentStack->Push(indentInfo); + if (child == tree->gtCall.gtCallArgs) + { + gtGetArgMsg(tree, listNested, listElemNum, bufp, BufLength); + } + else + { + assert(child == tree->gtCall.gtCallLateArgs); + gtGetLateArgMsg(tree, listNested, listElemNum, bufp, BufLength); + } + nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElemNested, indentStack, bufp); + indentStack->Pop(); + } + + // Skip the GT_LIST nodes, as we do not print them, and the next node to print will occur + // after the list. + while (nextLinearNode->OperGet() == GT_LIST) + { + nextLinearNode = nextLinearNode->gtNext; + } + + listElemNum++; + continue; + } + if (child == tree->gtCall.gtCallArgs) { gtGetArgMsg(tree, listElem, listElemNum, bufp, BufLength); @@ -8643,6 +8740,7 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt, { sprintf_s(bufp, sizeof(buf), "List Item %d", listElemNum); } + indentStack->Push(indentInfo); nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElem, indentStack, bufp); indentStack->Pop(); @@ -10179,6 +10277,7 @@ LNG_ADD_CHKOVF: } } } + lval1 = ltemp; break; case GT_OR : lval1 |= lval2; break; diff --git a/src/jit/gentree.h b/src/jit/gentree.h index f6c850ea5a..1402445da0 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1027,6 +1027,11 @@ public: return OperIsCopyBlkOp(OperGet()); } + bool OperIsPutArgStk() const + { + return gtOper == GT_PUTARG_STK; + } + bool OperIsAddrMode() const { return OperIsAddrMode(OperGet()); @@ -1125,7 +1130,7 @@ public: static int OperIsSimple(genTreeOps gtOper) { - return (OperKind(gtOper) & GTK_SMPOP ) != 0; + return (OperKind(gtOper) & GTK_SMPOP ) != 0; } static @@ -1294,7 +1299,7 @@ public: static inline bool RequiresNonNullOp2(genTreeOps oper); - + bool IsListOfLclFlds(); #endif // DEBUG inline bool IsZero(); @@ -2277,7 +2282,7 @@ struct GenTreeColon: public GenTreeOp /* gtCall -- method call (GT_CALL) */ typedef class fgArgInfo * fgArgInfoPtr; -struct GenTreeCall: public GenTree +struct GenTreeCall final : public GenTree { GenTreePtr gtCallObjp; // The instance argument ('this' pointer) GenTreeArgList* gtCallArgs; // The list of arguments in original evaluation order @@ -2296,6 +2301,14 @@ struct GenTreeCall: public GenTree CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + + void SetRegisterReturningStructState(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDescIn) + { + structDesc.CopyFrom(structDescIn); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #define GTF_CALL_M_EXPLICIT_TAILCALL 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks #define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall @@ -2438,9 +2451,12 @@ struct GenTreeCall: public GenTree GenTreeCall(var_types type) : GenTree(GT_CALL, type) - {} + { + } #if DEBUGGABLE_GENTREE - GenTreeCall() : GenTree() {} + GenTreeCall() : GenTree() + { + } #endif }; @@ -3024,7 +3040,7 @@ struct GenTreeRetExpr: public GenTree { GenTreePtr gtInlineCandidate; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) CORINFO_CLASS_HANDLE gtRetClsHnd; #endif @@ -3243,10 +3259,26 @@ struct GenTreePutArgStk: public GenTreeUnOp // Fast tail calls set this to true. // In future if we need to add more such bool fields consider bit fields. - GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum, bool _putInIncomingArgArea = false - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), - gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + bool _putInIncomingArgArea = false + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), + gtSlotNum(slotNum), + putInIncomingArgArea(_putInIncomingArgArea) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3254,22 +3286,53 @@ struct GenTreePutArgStk: public GenTreeUnOp } - GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum, bool _putInIncomingArgArea = false - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), - gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + GenTreePtr op1, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + bool _putInIncomingArgArea = false + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), + gtSlotNum(slotNum), + putInIncomingArgArea(_putInIncomingArgArea) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; #endif } -#else // !FEATURE_FASTTAIL_CALL - - GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), - gtSlotNum(slotNum) +#else // !FEATURE_FASTTAILCALL + + GenTreePutArgStk( + genTreeOps oper, + var_types type, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type DEBUG_ARG(largeNode)), + gtSlotNum(slotNum) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3277,10 +3340,25 @@ struct GenTreePutArgStk: public GenTreeUnOp } - GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum - DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) : - GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), - gtSlotNum(slotNum) + GenTreePutArgStk( + genTreeOps oper, + var_types type, + GenTreePtr op1, + unsigned slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) + DEBUG_ARG(GenTreePtr callNode = NULL) + DEBUG_ARG(bool largeNode = false)) + : + GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)), + gtSlotNum(slotNum) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , gtPutArgStkKind(PutArgStkKindInvalid), + gtNumSlots(numSlots), + gtIsStruct(isStruct), + gtNumberReferenceSlots(0), + gtGcPtrs(nullptr) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { #ifdef DEBUG gtCall = callNode; @@ -3288,10 +3366,53 @@ struct GenTreePutArgStk: public GenTreeUnOp } #endif // FEATURE_FASTTAILCALL +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + //------------------------------------------------------------------------ + // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM. + // + // Arguments: + // numPointers - Number of pointer references. + // pointers - layout of the struct (with pointers marked.) + // + // Return Value: + // None + // + // Notes: + // This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value. + // If no pointer references are used, block copying instructions are used. + // Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted. + // Any non pointer references between the pointer reference slots are copied in block fashion. + // + void setGcPointers(unsigned numPointers, BYTE* pointers) + { + gtNumberReferenceSlots = numPointers; + gtGcPtrs = pointers; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef DEBUG GenTreePtr gtCall; // the call node to which this argument belongs #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Instruction selection: during codegen time, what code sequence we will be using + // to encode this operation. + + enum PutArgStkKind : __int8 + { + PutArgStkKindInvalid, + PutArgStkKindRepInstr, + PutArgStkKindUnroll, + }; + + PutArgStkKind gtPutArgStkKind; + + unsigned gtNumSlots; // Number of slots for the argument to be passed on stack + bool gtIsStruct; // This stack arg is a struct. + unsigned gtNumberReferenceSlots; // Number of reference slots. + BYTE* gtGcPtrs; // gcPointers +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #if DEBUGGABLE_GENTREE GenTreePutArgStk() : GenTreeUnOp() {} #endif @@ -3325,6 +3446,30 @@ inline GenTreePtr GenTree::MoveNext() return gtOp.gtOp2; } +#ifdef DEBUG +inline bool GenTree::IsListOfLclFlds() + +{ + if (!IsList()) + { + return false; + } + + GenTree* gtListPtr = this; + while (gtListPtr->Current() != nullptr) + { + if (gtListPtr->Current()->OperGet() != GT_LCL_FLD) + { + return false; + } + + gtListPtr = gtListPtr->MoveNext(); + } + + return true; +} +#endif // DEBUG + inline GenTreePtr GenTree::Current() { assert(IsList()); diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index d56ca3ddda..0ee654c837 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -1152,13 +1152,22 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, BasicBlock * block /* = NULL */ ) { - assert(src->TypeGet() == TYP_STRUCT); - + assert(src->TypeGet() == TYP_STRUCT || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF)); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // TODO-ARM-BUG: Does ARM need this? + // TODO-ARM64-BUG: Does ARM64 need this? + assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || + src->gtOper == GT_IND || src->gtOper == GT_LDOBJ || + src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || + src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA || + src->gtOper == GT_ADDR || GenTree::OperIsSIMD(src->gtOper)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_LDOBJ || src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA || GenTree::OperIsSIMD(src->gtOper)); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (src->gtOper == GT_CALL) { @@ -1187,8 +1196,14 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum); lcl->gtType = src->gtType; dest = lcl; -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd); +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. + assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs."); + + // Make the struct non promotable. The eightbytes could contain multiple fields. + lvaTable[lcl->gtLclVarCommon.gtLclNum].lvDontPromote = true; #endif } else @@ -1207,6 +1222,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, { GenTreePtr call = src->gtRetExpr.gtInlineCandidate; noway_assert(call->gtOper == GT_CALL); + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) { // insert the return value buffer into the argument list as first byref parameter @@ -1274,7 +1290,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, } else if (src->gtOper == GT_COMMA) { - assert(src->gtOp.gtOp2->gtType == TYP_STRUCT); // Second thing is the struct + // Second thing is the struct or it's address. + assert(src->gtOp.gtOp2->gtType == TYP_STRUCT || src->gtOp.gtOp2->gtType == TYP_BYREF); if (pAfterStmt) { * pAfterStmt = fgInsertStmtAfter(block, * pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs)); @@ -1287,6 +1304,10 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest, // evaluate the second thing using recursion return impAssignStructPtr(dest, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block); } + else if (src->gtOper == GT_ADDR) + { + // In case of address already in src, use it to copy the struct. + } else { src = gtNewOperNode(GT_ADDR, TYP_BYREF, src); @@ -4528,8 +4549,7 @@ GenTreePtr Compiler::impTransformThis (GenTreePtr thisPtr, GenTreePtr obj = thisPtr; assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL); - obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass - ); + obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass); obj->gtFlags |= GTF_EXCEPT; CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass); @@ -5948,7 +5968,14 @@ var_types Compiler::impImportCall (OPCODE opcode, } } - /* Check for varargs */ + // Check for varargs +#if !FEATURE_VARARG + if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || + (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) + { + BADCODE("Varargs not supported."); + } +#endif // !FEATURE_VARARG if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG || (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG) @@ -6699,12 +6726,23 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * return false; } -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +#if defined(_TARGET_AMD64_) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(!info.compIsVarArgs && "Varargs not supported in CoreCLR on Unix."); + if (IsRegisterPassable(methInfo->args.retTypeClass)) + { + return false; + } + + // The struct is not aligned properly or it is bigger than 16 bytes, + // or it is custom layout, or it is not passed in registers for any other reason. + return true; +#elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + // Check for TYP_STRUCT argument that can fit into a single register. // We don't need a return buffer if: // i) TYP_STRUCT argument that can fit into a single register and // ii) Power of two sized TYP_STRUCT. unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass); - return (size > TARGET_POINTER_SIZE) || ((size & (size-1)) != 0); + return (size > TARGET_POINTER_SIZE) || ((size & (size - 1)) != 0); #elif defined(_TARGET_ARM_) // Check for non HFA: in ARM HFAs are returned in registers. if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass)) @@ -6717,8 +6755,6 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * // TODO-ARM64-NYI: HFA/HVA arguments. // Check for TYP_STRUCT argument that is greater than 16 bytes. return info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > 16; -#elif defined(_TARGET_X86_) - return true; #else // _TARGET_* #error Unsupported or unset target architecture #endif // _TARGET_* @@ -6792,7 +6828,6 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd) { assert(call->gtOper == GT_CALL); - if (call->TypeGet() != TYP_STRUCT) { return call; @@ -6826,13 +6861,46 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call, return call; } - return impAssignHfaToVar(call, retClsHnd); + return impAssignStructToVar(call, retClsHnd); } -#endif +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. + assert(!call->gtCall.IsVarargs() && "varargs not allowed for System V OSs."); + + // The return is a struct if not normalized to a single eightbyte return type below. + call->gtCall.gtReturnType = TYP_STRUCT; + // Get the classification for the struct. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + if (structDesc.passedInRegisters) + { + call->gtCall.SetRegisterReturningStructState(structDesc); + + if (structDesc.eightByteCount <= 1) + { + call->gtCall.gtReturnType = getEightByteType(structDesc, 0); + } + else + { + if (!call->gtCall.CanTailCall() && ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0)) + { + // If we can tail call returning in registers struct or inline a method that returns + // a registers returned struct, then don't assign it to + // a variable back and forth. + return impAssignStructToVar(call, retClsHnd); + } + } + } + else + { + call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; + } + + return call; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING unsigned size = info.compCompHnd->getClassSize(retClsHnd); BYTE gcPtr = 0; - // Check for TYP_STRUCT argument that can fit into a single register // change the type on those trees. // TODO-ARM64-NYI: what about structs 9 to 16 bytes that fit in two consecutive registers? @@ -6913,7 +6981,37 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL assert(info.compRetBuffArg == BAD_VAR_NUM); #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING assert(info.compRetNativeType != TYP_STRUCT); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(!info.compIsVarArgs); // No VarArgs for CoreCLR. + if (info.compRetNativeType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + + if (structDesc.passedInRegisters) + { + if (op->gtOper == GT_LCL_VAR) + { + // This LCL_VAR is a register return value, it stays as a TYP_STRUCT + unsigned lclNum = op->gtLclVarCommon.gtLclNum; + // Make sure this struct type stays as struct so that we can return it in registers. + lvaTable[lclNum].lvDontPromote = true; + + return op; + } + + if (op->gtOper == GT_CALL) + { + return op; + } + + return impAssignStructToVar(op, retClsHnd); + } + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + #elif defined(_TARGET_ARM_) if (!info.compIsVarArgs && IsHfa(retClsHnd)) { @@ -6941,7 +7039,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL return op; } } - return impAssignHfaToVar(op, retClsHnd); + return impAssignStructToVar(op, retClsHnd); } #endif @@ -7003,7 +7101,22 @@ REDO_RETURN_NODE: } else { - assert(info.compRetNativeType == op->gtCall.gtReturnType); +#ifdef DEBUG +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (op->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + assert(getEightByteType(structDesc, 0) == op->gtCall.gtReturnType); + } + else +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + { + assert(info.compRetNativeType == op->gtCall.gtReturnType); + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // DEBUG // Don't change the gtType node just yet, it will get changed later return op; } @@ -7012,8 +7125,19 @@ REDO_RETURN_NODE: { op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd); } - - op->gtType = info.compRetNativeType; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (op->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + op->gtType = getEightByteType(structDesc, 0); + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + op->gtType = info.compRetNativeType; + } return op; } @@ -11412,7 +11536,6 @@ DO_LDFTN: } eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo); - // Figure out the type of the member. We always call canAccessField, so you always need this // handle CorInfoType ciType = fieldInfo.fieldType; @@ -11590,7 +11713,6 @@ DO_LDFTN: /* Create the data member node */ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset); - op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation if (isLoadAddress) @@ -11850,7 +11972,6 @@ FIELD_DONE: /* Create the data member node */ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset); - op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation break; @@ -12396,7 +12517,11 @@ FIELD_DONE: | | | push the BYREF to this local | |--------------------------------------------------------------------- | UNBOX_ANY | push a GT_LDOBJ of | push the STRUCT | - | | the BYREF | | + | | the BYREF | For Linux when the | + | | | struct is returned in two | + | | | registers create a temp | + | | | which address is passed to | + | | | the unbox_nullable helper. | |--------------------------------------------------------------------- */ @@ -12434,11 +12559,40 @@ FIELD_DONE: impPushOnStack(op1, tiRetVal); oper = GT_LDOBJ; goto LDOBJ; - } - + } + + assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!"); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (op1->gtType == TYP_STRUCT) + { + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(resolvedToken.hClass, &structDesc); + if (structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS) + { + // Unbox nullable helper returns a TYP_STRUCT. + // We need to spill it to a temp so than we can take the address of it. + // We need the temp so we can pass its address to the unbox_nullable jit helper function. + // This is needed for 2 register returned nullables. + // The one register ones are normalized. For the bigger than 16 bytes ones there is retbuf already passed in rdi. + + unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable")); + lvaTable[tmp].lvDontPromote = true; + lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */); + + op2 = gtNewLclvNode(tmp, TYP_STRUCT); + op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL); + assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp. + + op2 = gtNewLclvNode(tmp, TYP_STRUCT); + op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2); + op1 = gtNewOperNode(GT_COMMA, TYP_STRUCT, op1, op2); + } + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(op1->gtType == TYP_STRUCT); tiRetVal = verMakeTypeInfo(resolvedToken.hClass); - assert(tiRetVal.IsValueClass()); + assert(tiRetVal.IsValueClass()); } impPushOnStack(op1, tiRetVal); @@ -12946,8 +13100,7 @@ LDOBJ: // LDOBJ returns a struct // and an inline argument which is the class token of the loaded obj - op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass - ); + op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass); op1->gtFlags |= GTF_EXCEPT; CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass); @@ -13231,7 +13384,7 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset) } } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) /************************************************************************************** * * When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the @@ -13269,12 +13422,32 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN } } } +#endif -GenTreePtr Compiler::impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) { - unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM.")); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for register returned structs in System V")); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM")); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE); - return gtNewLclvNode(tmpNum, TYP_STRUCT); + GenTreePtr ret = gtNewLclvNode(tmpNum, TYP_STRUCT); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#ifdef DEBUG + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc); + // If single eightbyte, the return type would have been normalized and there won't be a temp var. + // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) + assert(structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); +#endif // DEBUG + // Mark the var to store the eightbytes on stack non promotable. + // The return value is based on eightbytes, so all the fields need + // to be on stack before loading the eightbyte in the corresponding return register. + lvaTable[tmpNum].lvDontPromote = true; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + return ret; } #endif @@ -13297,7 +13470,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & Verify(!verIsByRefLike(tiDeclared) || verIsSafeToReturnByRef(tiVal) , "byref return"); - + Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch"); expectedStack=1; } @@ -13502,15 +13675,35 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & se.seTypeInfo.GetClassHandle(), (unsigned) CHECK_SPILL_ALL); } -#ifdef _TARGET_ARM_ + // TODO-ARM64-NYI: HFA + // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the + // next ifdefs could be refactored in a single method with the ifdef inside. +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(_TARGET_ARM_) if (IsHfa(retClsHnd)) { // Same as !IsHfa but just don't bother with impAssignStructPtr. +#else // !defined(_TARGET_ARM_) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc); + if (structDesc.passedInRegisters) + { + // If single eightbyte, the return type would have been normalized and there won't be a temp var. + // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) + assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr. +#endif // !defined(_TARGET_ARM_) + if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM) { if (!impInlineInfo->retExpr) { +#if defined(_TARGET_ARM_) impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT); +#else // !defined(_TARGET_ARM_) + // The inlinee compiler has figured out the type of the temp already. Use it here. + impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType); +#endif // !defined(_TARGET_ARM_) } } else @@ -13519,7 +13712,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } } else -#endif +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { assert(iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG); GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1); @@ -13575,8 +13768,9 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } else if (info.compRetType == TYP_STRUCT) { -#ifndef _TARGET_ARM_ +#if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // In ARM HFA native types are maintained as structs. + // The multi register System V AMD64 return structs are also left as structs and not normalized. // TODO-ARM64-NYI: HFA noway_assert(info.compRetNativeType != TYP_STRUCT); #endif diff --git a/src/jit/jit.h b/src/jit/jit.h index 9702da3ec9..2901ffd6eb 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -220,6 +220,22 @@ #define INDEBUG_LDISASM_COMMA(x) #endif +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) +#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +#if defined(UNIX_AMD64_ABI) +#define UNIX_AMD64_ABI_ONLY_ARG(x) , x +#define UNIX_AMD64_ABI_ONLY(x) x +#else // !defined(UNIX_AMD64_ABI) +#define UNIX_AMD64_ABI_ONLY_ARG(x) +#define UNIX_AMD64_ABI_ONLY(x) +#endif // defined(UNIX_AMD64_ABI) + // To get rid of warning 4701 : local variable may be used without being initialized #define DUMMY_INIT(x) (x) @@ -605,7 +621,11 @@ unsigned int unsigned_abs(int x) inline size_t unsigned_abs(ssize_t x) { +#ifndef FEATURE_PAL return ((size_t) abs(x)); +#else // !FEATURE_PAL + return ((size_t) labs(x)); +#endif // !FEATURE_PAL } #endif // _TARGET_64BIT_ diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h index 5c8d10f1b7..4063bafe15 100644 --- a/src/jit/jitgcinfo.h +++ b/src/jit/jitgcinfo.h @@ -253,7 +253,6 @@ public : #endif unsigned short cdArgCnt; - unsigned short cdArgBaseOffset; union { diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index c12f735f68..b9e89f156d 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -103,8 +103,8 @@ void Compiler::lvaInitTypeRef() /* Set compArgsCount and compLocalsCount */ info.compArgsCount = info.compMethodInfo->args.numArgs; - - /* Is there a 'this' pointer */ + + // Is there a 'this' pointer if (!info.compIsStatic) { @@ -133,6 +133,18 @@ void Compiler::lvaInitTypeRef() else #endif { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc); + if (structDesc.eightByteCount > 1) + { + info.compRetNativeType = TYP_STRUCT; + } + else + { + info.compRetNativeType = getEightByteType(structDesc, 0); + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING unsigned size = info.compCompHnd->getClassSize(info.compMethodInfo->args.retTypeClass); // Check for TYP_STRUCT argument that can fit into a single register @@ -173,6 +185,7 @@ void Compiler::lvaInitTypeRef() assert(!"Unexpected size when returning struct by value"); break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } } @@ -191,7 +204,9 @@ void Compiler::lvaInitTypeRef() calling convention is varargs */ if (info.compIsVarArgs) + { info.compArgsCount++; + } // Is there an extra parameter used to pass instantiation info to // shared generic methods and shared generic struct instance methods? @@ -356,18 +371,17 @@ void Compiler::lvaInitArgs(InitVarDscInfo * varDscInfo) //---------------------------------------------------------------------- - /* We have set info.compArgsCount in compCompile() */ - + // We have set info.compArgsCount in compCompile() noway_assert(varDscInfo->varNum == info.compArgsCount); assert (varDscInfo->intRegArgNum <= MAX_REG_ARG); - + codeGen->intRegState.rsCalleeRegArgNum = varDscInfo->intRegArgNum; #if !FEATURE_STACK_FP_X87 codeGen->floatRegState.rsCalleeRegArgNum = varDscInfo->floatRegArgNum; #endif // FEATURE_STACK_FP_X87 - /* The total argument size must be aligned. */ + // The total argument size must be aligned. noway_assert((compArgSize % sizeof(void*)) == 0); #ifdef _TARGET_X86_ @@ -440,6 +454,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) } #endif compArgSize += TARGET_POINTER_SIZE; + varDscInfo->varNum++; varDscInfo->varDsc++; } @@ -449,7 +464,17 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) void Compiler::lvaInitRetBuffArg(InitVarDscInfo * varDscInfo) { LclVarDsc * varDsc = varDscInfo->varDsc; - const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo); + bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (info.compRetNativeType == TYP_STRUCT) + { + if (IsRegisterPassable(info.compMethodInfo->args.retTypeClass)) + { + hasRetBuffArg = false; + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING if (hasRetBuffArg) { @@ -594,7 +619,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) // the type as a float or double. argType = hfaType; } - if (isRegParamType(argType)) { compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES; @@ -644,19 +668,94 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) } #else // !_TARGET_ARM_ +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (argType == TYP_STRUCT) + { + assert(typeHnd != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + if (structDesc.passedInRegisters) + { + unsigned intRegCount = 0; + unsigned floatRegCount = 0; - varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + switch (structDesc.eightByteClassifications[i]) + { + case SystemVClassificationTypeInteger: + case SystemVClassificationTypeIntegerReference: + intRegCount++; + break; + case SystemVClassificationTypeSSE: + floatRegCount++; + break; + default: + assert(false && "Invalid eightbyte classification type."); + break; + } + } + + if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount)) + { + structDesc.passedInRegisters = false; // No register to enregister the eightbytes. + } + + if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount)) + { + structDesc.passedInRegisters = false; // No register to enregister the eightbytes. + } + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + // The final home for this incoming register might be our local stack frame + // For System V platforms the final home will always be on the local stack frame. + varDsc->lvOnFrame = true; #endif // !_TARGET_ARM_ - if (varDscInfo->canEnreg(argType, cSlotsToEnregister)) + bool canPassArgInRegisters = false; + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) + { + canPassArgInRegisters = structDesc.passedInRegisters; + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); + } + + if (canPassArgInRegisters) { /* Another register argument */ // Allocate the registers we need. allocRegArg() returns the first argument register number of the set. // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting // to the stack happens. - unsigned firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); + unsigned firstAllocatedRegArgNum = 0; + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned secondAllocatedRegArgNum = 0; + var_types firstEightByteType = TYP_UNDEF; + var_types secondEightByteType = TYP_UNDEF; + varDsc->lvOtherArgReg = REG_NA; + + if (argType == TYP_STRUCT) + { + if (structDesc.eightByteCount >= 1) + { + firstEightByteType = getEightByteType(structDesc, 0); + firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); + } #ifdef _TARGET_ARM_ if (isHfaArg) @@ -668,7 +767,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #endif // _TARGET_ARM_ varDsc->lvIsRegArg = 1; - varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType); + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) + { + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType); + + // If there is a second eightbyte, get a register for it too and map the arg to the reg number. + if (structDesc.eightByteCount >= 2) + { + secondEightByteType = getEightByteType(structDesc, 1); + secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1); + } + + if (secondEightByteType != TYP_UNDEF) + { + varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType); + varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) + { + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType); + } + varDsc->setPrefReg(varDsc->lvArgReg, this); #ifdef _TARGET_ARM_ @@ -682,52 +805,91 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #ifdef DEBUG if (verbose) { - printf("Arg #%u passed in register ", varDscInfo->varNum); - - bool isFloat = varTypeIsFloating(argType); - unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType); + printf("Arg #%u passed in register(s) ", varDscInfo->varNum); + bool isFloat = false; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // In case of one eightbyte struct the type is already normalized earlier. + // The varTypeIsFloating(argType) is good for this case. + if ((argType == TYP_STRUCT) && (structDesc.eightByteCount >= 1)) + { + isFloat = varTypeIsFloating(firstEightByteType); + } + else +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + isFloat = varTypeIsFloating(argType); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argType == TYP_STRUCT) { - if (ix > 0) - printf(","); + // Print both registers, just to be clear + if (firstEightByteType == TYP_UNDEF) + { + printf("firstEightByte: <not used>"); + } + else + { + printf("firstEightByte: %s", getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType), isFloat)); + } - if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack + if (secondEightByteType == TYP_UNDEF) { - printf(" stack slots:%d", cSlots - ix); - break; + printf(", secondEightByte: <not used>"); } + else + { + printf(", secondEightByte: %s", getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType), varTypeIsFloating(secondEightByteType))); + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType); -#ifdef _TARGET_ARM_ - if (isFloat) + for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++) { - // Print register size prefix - if (argType == TYP_DOUBLE) + if (ix > 0) + printf(","); + + if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack + { + printf(" stack slots:%d", cSlots - ix); + break; + } + +#ifdef _TARGET_ARM_ + if (isFloat) { - // Print both registers, just to be clear - printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat), - getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat)); - - // doubles take 2 slots - assert(ix + 1 < cSlots); - ++ix; - ++regArgNum; + // Print register size prefix + if (argType == TYP_DOUBLE) + { + // Print both registers, just to be clear + printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat), + getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat)); + + // doubles take 2 slots + assert(ix + 1 < cSlots); + ++ix; + ++regArgNum; + } + else + { + printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); + } } else +#endif // _TARGET_ARM_ { printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); } } - else -#endif // _TARGET_ARM_ - { - printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat)); - } } printf("\n"); } #endif // DEBUG - } // if canEnreg() + } // end if (canPassArgInRegisters) else { #ifdef _TARGET_ARM_ @@ -739,8 +901,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #endif } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of + // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up. + compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING compArgSize += argSize; - +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING if (info.compIsVarArgs) { #if defined(_TARGET_X86_) @@ -807,6 +974,7 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo * varDscInfo) varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet()); varDsc->setPrefReg(varDsc->lvArgReg, this); varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame + varDscInfo->intRegArgNum++; #ifdef DEBUG @@ -1180,11 +1348,6 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, lvaStructPromotionInfo * StructPromotionInfo, bool sortFields) { -#ifdef UNIX_AMD64_ABI - // TODO-Amd64-Unix: For now don't promote structs on Linux. - // This should be brought online with the full SystemVStruct passing work. - return; -#endif // UNIX_AMD64_ABI assert(eeIsValueClass(typeHnd)); if (typeHnd != StructPromotionInfo->typeHnd) @@ -2844,14 +3007,21 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree) } #endif // ASSERTION_PROP + bool allowStructs = false; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // On System V the type of the var could be a TYP_STRUCT. + allowStructs = varDsc->lvType == TYP_STRUCT; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + /* Variables must be used as the same type throughout the method */ - noway_assert(tiVerificationNeeded || - varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || - genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || - (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || - (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || - (tree->gtFlags & GTF_VAR_CAST) || - varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); + noway_assert(tiVerificationNeeded || + varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || + allowStructs || + genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) || + (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) || + (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || + (tree->gtFlags & GTF_VAR_CAST) || + varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType)); /* Remember the type of the reference */ @@ -3690,7 +3860,6 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } #endif //_TARGET_AMD64_ - unsigned lclNum; LclVarDsc * varDsc; for (lclNum = 0, varDsc = lvaTable; @@ -3735,6 +3904,7 @@ void Compiler::lvaFixVirtualFrameOffsets() if (doAssignStkOffs) { varDsc->lvStkOffs += delta; + #if DOUBLE_ALIGN if (genDoubleAlign() && !codeGen->isFramePointerUsed()) { @@ -3886,11 +4056,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() { noway_assert(lclNum == info.compThisArg); #ifndef _TARGET_X86_ -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); #endif // _TARGET_X86_ lclNum++; } @@ -3902,11 +4068,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() noway_assert(lclNum == info.compRetBuffArg); noway_assert(lvaTable[lclNum].lvIsRegArg); #ifndef _TARGET_X86_ -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); #endif // _TARGET_X86_ lclNum++; } @@ -3917,20 +4079,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) { noway_assert(lclNum == (unsigned)info.compTypeCtxtArg); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } if (info.compIsVarArgs) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } #endif // USER_ARGS_COME_LAST @@ -3976,18 +4130,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (lvaIsPreSpilled(preSpillLclNum, preSpillMask)) { unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg( - preSpillLclNum, - argSize, - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI argOffs = lvaAssignVirtualFrameOffsetToArg( preSpillLclNum, argSize, argOffs); -#endif // !UNIX_AMD64_ABI argLcls++; // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100 @@ -4008,18 +4154,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() { if (!lvaIsPreSpilled(stkLclNum, preSpillMask)) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg( - stkLclNum, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI argOffs = lvaAssignVirtualFrameOffsetToArg( stkLclNum, eeGetArgSize(argLst, &info.compMethodInfo->args), argOffs); -#endif // !UNIX_AMD64_ABI argLcls++; } argLst = info.compCompHnd->getArgNext(argLst); @@ -4029,16 +4167,18 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() #else // !_TARGET_ARM_ for (unsigned i = 0; i < argSigLen; i++) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs, - &callerArgOffset); -#else // !UNIX_AMD64_ABI + unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // On the stack frame the homed arg always takes a full number of slots + // for proper stack alignment. Make sure the real struct size is properly rounded up. + argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, - eeGetArgSize(argLst, &info.compMethodInfo->args), - argOffs); -#endif // UNIX_AMD64_ABI + argumentSize, + argOffs + UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); argLst = info.compCompHnd->getArgNext(argLst); } #endif // !_TARGET_ARM_ @@ -4049,26 +4189,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) { noway_assert(lclNum == (unsigned)info.compTypeCtxtArg); -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } if (info.compIsVarArgs) { -#ifdef UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset); -#else // !UNIX_AMD64_ABI - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs); -#endif // !UNIX_AMD64_ABI + argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } #endif // USER_ARGS_COME_LAST } +#ifdef UNIX_AMD64_ABI // // lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an // individual argument, and return the offset for the next argument. @@ -4076,12 +4209,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() // (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) // The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance, // ret address slot, stack frame padding, alloca instructions, etc. +// Note: This is the implementation for UNIX_AMD64 System V platforms. // -#ifdef UNIX_AMD64_ABI -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int * callerArgOffset) -#else // !UNIX_AMD64_ABI -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs) -#endif // !UNIX_AMD64_ABI +int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset)) { noway_assert(lclNum < info.compArgsCount); noway_assert(argSize); @@ -4114,30 +4244,131 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize if (varDsc->lvIsRegArg) { - /* Argument is passed in a register, don't count it - * when updating the current offset on the stack */ - -#ifndef _TARGET_ARM_ - noway_assert(argSize == sizeof(void *)); -#endif + // Argument is passed in a register, don't count it + // when updating the current offset on the stack. -#if defined(_TARGET_X86_) - argOffs += sizeof(void *); -#elif defined(_TARGET_AMD64_) -#ifdef UNIX_AMD64_ABI if (varDsc->lvOnFrame) -#endif { // The offset for args needs to be set only for the stack homed arguments for System V. varDsc->lvStkOffs = argOffs; - argOffs += sizeof(void *); } -#ifdef UNIX_AMD64_ABI - else + else { varDsc->lvStkOffs = 0; } + } + else + { + // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed. + // So, they can be accessed with positive offset. + // On System V platforms, if the RA decides to home a register passed arg on the stack, + // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments + // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.) + // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space. + // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters. + // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence, + // existence of alloca instructions, ret address pushed, ets. + + varDsc->lvStkOffs = *callerArgOffset; + // Structs passed on stack could be of size less than TARGET_POINTER_SIZE. + // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment. + if (varDsc->lvType == TYP_STRUCT) + { + *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE); + } + else + { + *callerArgOffset += TARGET_POINTER_SIZE; + } + } + + // For struct promoted parameters we need to set the offsets for both LclVars. + // + // For a dependent promoted struct we also assign the struct fields stack offset + if (varDsc->lvPromotedStruct()) + { + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); + + if (promotionType == PROMOTION_TYPE_DEPENDENT) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + assert(fieldVarNum == varDsc->lvFieldLclStart); + lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + } + } + // For an independent promoted struct field we also assign the parent struct stack offset + else if (varDsc->lvIsStructField) + { + noway_assert(varDsc->lvParentLcl < lvaCount); + lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; + } + + if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) + argOffs += argSize; + + return argOffs; +} + +#else // !UNIX_AMD64_ABI + +// +// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an +// individual argument, and return the offset for the next argument. +// Note: This method only calculates the initial offset of the stack passed/spilled arguments +// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) +// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance, +// ret address slot, stack frame padding, alloca instructions, etc. +// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.) +int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset)) +{ + noway_assert(lclNum < info.compArgsCount); + noway_assert(argSize); + + if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + argOffs -= argSize; + + unsigned fieldVarNum = BAD_VAR_NUM; + + noway_assert(lclNum < lvaCount); + LclVarDsc * varDsc = lvaTable + lclNum; + + if (varDsc->lvPromotedStruct()) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + fieldVarNum = varDsc->lvFieldLclStart; + + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); + + if (promotionType == PROMOTION_TYPE_INDEPENDENT) + { + lclNum = fieldVarNum; + noway_assert(lclNum < lvaCount); + varDsc = lvaTable + lclNum; + assert(varDsc->lvIsStructField); + } + } + + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg) + { + /* Argument is passed in a register, don't count it + * when updating the current offset on the stack */ + +#ifndef _TARGET_ARM_ +#if DEBUG + noway_assert(argSize == sizeof(void *)); +#endif // DEBUG #endif + +#if defined(_TARGET_X86_) + argOffs += sizeof(void *); +#elif defined(_TARGET_AMD64_) + // The offset for args needs to be set only for the stack homed arguments for System V. + varDsc->lvStkOffs = argOffs; + // Register arguments also take stack space. + argOffs += sizeof(void *); #elif defined(_TARGET_ARM64_) // Register arguments don't take stack space. #elif defined(_TARGET_ARM_) @@ -4181,32 +4412,32 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize case TYP_DOUBLE: case TYP_LONG: + { + // + // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8. + // + // ------- CALLER SP ------- + // r3 + // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping. + // r1 VACookie -- argOffs = 0 + // ------------------------- + // + // Consider argOffs as if it accounts for number of prespilled registers before the current register. + // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs + // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then + // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped. + // + // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register. + int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE; + if (argOffs < prevRegsSize) { - // - // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8. - // - // ------- CALLER SP ------- - // r3 - // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping. - // r1 VACookie -- argOffs = 0 - // ------------------------- - // - // Consider argOffs as if it accounts for number of prespilled registers before the current register. - // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs - // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then - // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped. - // - // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register. - int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE; - if (argOffs < prevRegsSize) - { - // We must align up the argOffset to a multiple of 8 to account for skipped registers. - argOffs = roundUp(argOffs, 2*TARGET_POINTER_SIZE); - } - // We should've skipped only a single register. - assert(argOffs == prevRegsSize); + // We must align up the argOffset to a multiple of 8 to account for skipped registers. + argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE); } - break; + // We should've skipped only a single register. + assert(argOffs == prevRegsSize); + } + break; default: // No alignment of argOffs required @@ -4292,16 +4523,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize if (!compIsProfilerHookNeeded()) #endif { - bool cond = (info.compIsVarArgs && - // Does cur stk arg require double alignment? - ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) || - (varDsc->lvType == TYP_DOUBLE) || - (varDsc->lvType == TYP_LONG)) - ) || - // Did first reg arg require alignment? - (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST)); - - noway_assert(cond); + bool cond = (info.compIsVarArgs && + // Does cur stk arg require double alignment? + ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) || + (varDsc->lvType == TYP_DOUBLE) || + (varDsc->lvType == TYP_LONG)) + ) || + // Did first reg arg require alignment? + (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST)); + + noway_assert(cond); noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of alignment } argOffs = sizeofPreSpillRegArgs; @@ -4321,7 +4552,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize case TYP_DOUBLE: case TYP_LONG: // We must align up the argOffset to a multiple of 8 - argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2*TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs; + argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs; break; default: @@ -4330,21 +4561,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize } #endif // _TARGET_ARM_ -#ifdef UNIX_AMD64_ABI - // For Windows there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed. - // So, they can be accessed with positive offset. - // On System V platforms, if the RA decides to home a register passed arg on the stack, - // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments - // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.) - // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space. - // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters. - // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence, - // existence of alloca instructions, ret address pushed, ets. - varDsc->lvStkOffs = *callerArgOffset; - *callerArgOffset += TARGET_POINTER_SIZE; -#else // !UNIX_AMD64_ABI varDsc->lvStkOffs = argOffs; -#endif // !UNIX_AMD64_ABI } // For struct promoted parameters we need to set the offsets for both LclVars. @@ -4360,31 +4577,31 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize } else #endif // !defined(_TARGET_64BIT_) - if (varDsc->lvPromotedStruct()) - { - lvaPromotionType promotionType = lvaGetPromotionType(varDsc); - - if (promotionType == PROMOTION_TYPE_DEPENDENT) + if (varDsc->lvPromotedStruct()) { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + lvaPromotionType promotionType = lvaGetPromotionType(varDsc); - assert(fieldVarNum == varDsc->lvFieldLclStart); - lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + if (promotionType == PROMOTION_TYPE_DEPENDENT) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + assert(fieldVarNum == varDsc->lvFieldLclStart); + lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs; + } } - } // For an independent promoted struct field we also assign the parent struct stack offset - else if (varDsc->lvIsStructField) - { - noway_assert(varDsc->lvParentLcl < lvaCount); - lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; - } + else if (varDsc->lvIsStructField) + { + noway_assert(varDsc->lvParentLcl < lvaCount); + lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs; + } if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) argOffs += argSize; return argOffs; } - +#endif // !UNIX_AMD64_ABI /***************************************************************************** * lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to @@ -5261,8 +5478,18 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs() { // For promoted struct fields that are params, we will // assign their offsets in lvaAssignVirtualFrameOffsetToArg(). + // This is not true for the System V systems since there is no + // outgoing args space. Assign the dependently promoted fields properly. // - if (varDsc->lvIsStructField && !varDsc->lvIsParam) + if (varDsc->lvIsStructField +#ifndef UNIX_AMD64_ABI + // For System V platforms there is no outgoing args space. + // A register passed struct arg is homed on the stack in a separate local var. + // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos. + // Make sure the code below is not executed for these structs and the offset is not changed. + && !varDsc->lvIsParam +#endif // UNIX_AMD64_ABI + ) { LclVarDsc * parentvarDsc = &lvaTable[varDsc->lvParentLcl]; lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index bb69d103cf..5882ecfa71 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1001,9 +1001,39 @@ void Lowering::SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr oldChild->InsertAfterSelf(newNode); } +//------------------------------------------------------------------------ +// NewPutArg: rewrites the tree to put an arg in a register or on the stack. +// +// Arguments: +// call - the call whose arg is being rewritten. +// arg - the arg being rewritten. +// fp - the ArgTabEntry for the argument. +// type - the type of the argument. +// +// Return Value: +// The new tree that was created to put the arg in the right place +// or the incoming arg if the arg tree was not rewritten. +// +// Assumptions: +// call, arg, and fp must be non-null. +// +// Notes: +// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) +// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs +// for two eightbyte structs. +// +// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing +// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers +// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. +// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) +// GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr fp, var_types type) { - GenTreePtr putArg; + assert(call != nullptr); + assert(arg != nullptr); + assert(fp != nullptr); + + GenTreePtr putArg = nullptr; bool updateArgTable = true; #if !defined(_TARGET_64BIT_) @@ -1015,7 +1045,22 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP type = TYP_INT; } #endif // !defined(_TARGET_64BIT_) - if (fp->regNum != REG_STK) + + bool isOnStack = true; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (type == TYP_STRUCT) + { + isOnStack = !fp->structDesc.passedInRegisters; + } + else + { + isOnStack = fp->regNum == REG_STK; + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + isOnStack = fp->regNum == REG_STK; +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (!isOnStack) { #ifdef FEATURE_SIMD // We can have SIMD types that are handled as TYP_DOUBLE, but which need to be @@ -1025,24 +1070,182 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP type = TYP_LONG; } #endif //FEATURE_SIMD +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (fp->isStruct) + { + // The following code makes sure a register passed struct arg is moved to + // the register before the call is made. + // There are two cases (comments added in the code below.) + // 1. The struct is of size one eightbyte: + // In this case a new tree is created that is GT_PUTARG_REG + // with a op1 the original argument. + // 2. The struct is contained in 2 eightbytes: + // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct. + // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST + // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1. + + assert(fp->structDesc.eightByteCount != 0); + + if (fp->structDesc.eightByteCount == 1) + { + // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree. + // + // Here the IR for this operation: + // lowering call : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use) + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + // args : + // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int + // + // late : + // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use) + // new node is : (3, 4)[000071] ------------ * putarg_reg int RV + // + // after : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use) + // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + } + else if (fp->structDesc.eightByteCount == 2) + { + // Case 2 above: Convert the LCL_FLDs to PUTARG_REG + // + // lowering call : + // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1)[000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4)[000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0] + // N010(3, 4)[000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use) + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + // args : + // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void + // + // late : + // lowering arg : N012(11, 13)[000065] ------------ * <list> struct + // + // after : + // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1)[000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0] + // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long + // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use) + // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + + assert(arg->OperGet() == GT_LIST); + GenTreeArgList* argListPtr = arg->AsArgList(); + + for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++) + { + // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD. + GenTreePtr newOper = comp->gtNewOperNode( + GT_PUTARG_REG, + comp->GetTypeFromClassificationAndSizes(fp->structDesc.eightByteClassifications[ctr], fp->structDesc.eightByteSizes[ctr]), + argListPtr->gtOp.gtOp1); + + // CopyCosts + newOper->CopyCosts(argListPtr->gtOp.gtOp1); + + // Splice in the new GT_PUTARG_REG node in the GT_LIST + SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper); + } - putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + // Just return arg. The GT_LIST is not replaced. + // Nothing more to do. + return arg; + } + else + { + assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR. + + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + } } else { // Mark this one as tail call arg if it is a fast tail call. // This provides the info to put this argument in in-coming arg area slot // instead of in out-going arg area slot. + + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(fp->isStruct == (type == TYP_STRUCT))); // Make sure state is correct + #if FEATURE_FASTTAILCALL - putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum, call->IsFastTailCall() DEBUG_ARG(call)); + putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, + type, + arg, + fp->slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct), + call->IsFastTailCall() + DEBUG_ARG(call)); #else - putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum DEBUG_ARG(call)); + putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, + type, + arg, + fp->slotNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots) + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct) + DEBUG_ARG(call)); #endif + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // If the ArgTabEntry indicates that this arg is a struct + // get and store the number of slots that are references. + // This is later used in the codegen for PUT_ARG_STK implementation + // for struct to decide whether and how many single eight-byte copies + // to be done (only for reference slots), so gcinfo is emitted. + // For non-reference slots faster/smaller size instructions are used - + // pair copying using XMM registers or rep mov instructions. + if (fp->isStruct) + { + assert(arg->OperGet() == GT_LDOBJ); + + BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[fp->numSlots]; + + unsigned numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtLdObj.gtClass, gcLayout); + + putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } + putArg->CopyCosts(arg); if (arg->InReg()) + { putArg->SetInReg(); + } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (fp->isStruct) + { + if (fp->structDesc.passedInRegisters) + { + putArg->SetInReg(); + } + } +#endif JITDUMP("new node is : "); DISPNODE(putArg); @@ -1076,10 +1279,14 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) // assignments/stores at this level are not really placing an arg // they are setting up temporary locals that will later be placed into // outgoing regs or stack - if (!arg->OperIsAssignment() && + if ( + !arg->OperIsAssignment() && !arg->OperIsStore() && !arg->IsArgPlaceHolderNode() && - !arg->IsNothingNode() && + !arg->IsNothingNode() && +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + !arg->OperIsPutArgStk() && +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING !arg->OperIsCopyBlkOp()) // these are de facto placeholders (apparently) { fgArgTabEntryPtr fp = comp->gtArgEntryByNode(call, arg); @@ -1153,7 +1360,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) #endif // !defined(_TARGET_64BIT_) { putArg = NewPutArg(call, arg, fp, type); - SpliceInUnary(call, ppArg, putArg); + + // In the case of register passable struct (in one or two registers) + // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.) + // If an extra node is returned, splice it in the right place in the tree. + if (arg != putArg) + { + // putArg and arg are equals if arg is GT_LIST (a list of multiple LCL_FLDs to be passed in registers.) + SpliceInUnary(call, ppArg, putArg); + } } } } diff --git a/src/jit/lower.h b/src/jit/lower.h index ae1f73e5b8..6754b7b75d 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -134,6 +134,10 @@ private: void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra); #endif // FEATURE_SIMD +#if defined(_TARGET_XARCH_) + void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind); +#endif // defined(_TARGET_XARCH_) + void SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr newNode); void DumpNodeInfoMap(); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 08c340cbee..a7b4600df9 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -103,7 +103,38 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) } } - +// TreeNodeInfoInitSimple: +// Sets the srcCount and dstCount for all the trees without special handling based on the tree node type. +// +// args: +// tree: The tree on which TreeNodeInfo's srcCount and dstCount are set. +// info: The TreeNodeInfo on which to set the srcCount and dstCount. +// This is the TreeNodeInfo corresponding to the tree parameter. +// kind: The kind flags of the tree node. +// +void Lowering::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind) +{ + info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + if (kind & (GTK_CONST | GTK_LEAF)) + { + info->srcCount = 0; + } + else if (kind & (GTK_SMPOP)) + { + if (tree->gtGetOp2() != nullptr) + { + info->srcCount = 2; + } + else + { + info->srcCount = 1; + } + } + else + { + unreached(); + } +} /** * Takes care of annotating the register requirements @@ -138,26 +169,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) GenTree* op2; default: - info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - if (kind & (GTK_CONST|GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - if (tree->gtGetOp2() != nullptr) - { - info->srcCount = 2; - } - else - { - info->srcCount = 1; - } - } - else - { - unreached(); - } + TreeNodeInfoInitSimple(tree, info, kind); break; case GT_LCL_FLD: @@ -275,6 +287,24 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) else #endif // !defined(_TARGET_64BIT_) { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (tree->TypeGet() == TYP_STRUCT && + tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) + { +#ifdef DEBUG + GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); + assert(varDsc->lvDontPromote); +#endif // DEBUG + // If this is a two eightbyte return, make the var + // contained by the return expression. The code gen will put + // the values in the right registers for return. + info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + info->dstCount = 0; + MakeSrcContained(tree, tree->gtOp.gtOp1); + break; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; info->dstCount = 0; @@ -840,9 +870,10 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } // First, count reg args - +#if FEATURE_VARARG bool callHasFloatRegArgs = false; - +#endif // !FEATURE_VARARG + for (GenTreePtr list = tree->gtCall.gtCallLateArgs; list; list = list->MoveNext()) { assert(list->IsList()); @@ -859,26 +890,52 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) assert(argNode->gtOper == GT_PUTARG_STK); argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.dstCount = 0; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // If the node is a struct and it is put on stack with + // putarg_stk operation, we consume and produce no registers. + // In this case the embedded LdObj node should not produce + // registers too since it is contained. + if (argNode->TypeGet() == TYP_STRUCT) + { + assert(argNode != nullptr && argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_LDOBJ); + argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0; + argNode->gtLsraInfo.srcCount = 0; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING continue; } - var_types argType = argNode->TypeGet(); + regNumber argReg = REG_NA; + regMaskTP argMask = RBM_NONE; + short regCount = 0; + bool isOnStack = true; + if (curArgTabEntry->regNum != REG_STK) + { + isOnStack = false; + var_types argType = argNode->TypeGet(); - callHasFloatRegArgs |= varTypeIsFloating(argType); +#if FEATURE_VARARG + callHasFloatRegArgs |= varTypeIsFloating(argType); +#endif // !FEATURE_VARARG - regNumber argReg = curArgTabEntry->regNum; - short regCount = 1; - // Default case is that we consume one source; modify this later (e.g. for - // promoted structs) - info->srcCount++; + argReg = curArgTabEntry->regNum; + regCount = 1; - regMaskTP argMask = genRegMask(argReg); - argNode = argNode->gtEffectiveVal(); - - if (argNode->TypeGet() == TYP_STRUCT) + // Default case is that we consume one source; modify this later (e.g. for + // promoted structs) + info->srcCount++; + + argMask = genRegMask(argReg); + argNode = argNode->gtEffectiveVal(); + } + + // If the struct arg is wraped in CPYBLK the type of the param will beTYP_VOID. + // Use the curArgTabEntry's isStruct to get whether the param is a struct. + if (argNode->TypeGet() == TYP_STRUCT + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct)) { unsigned originalSize = 0; - bool isPromoted = false; LclVarDsc* varDsc = nullptr; if (argNode->gtOper == GT_LCL_VAR) { @@ -893,20 +950,70 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { noway_assert(!"GT_LDOBJ not supported for amd64"); } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (argNode->gtOper == GT_PUTARG_REG) + { + originalSize = genTypeSize(argNode->gtType); + } + else if (argNode->gtOper == GT_LIST) + { + originalSize = 0; + + // There could be up to 2 PUTARG_REGs in the list + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + for (; argListPtr; argListPtr = argListPtr->Rest()) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + if (iterationNum == 0) + { + varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum; + originalSize = varDsc->lvSize(); + assert(originalSize != 0); + } + else + { + // Need an extra source for every node, but the first in the list. + info->srcCount++; + + // Get the mask for the second putarg_reg + argMask = genRegMask(curArgTabEntry->otherRegNum); + } + + putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask); + putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask); + + // To avoid redundant moves, have the argument child tree computed in the + // register in which the argument is passed to the call. + putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode)); + iterationNum++; + } + + assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING else { noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind"); } - unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; - regNumber reg = (regNumber)(argReg + 1); - unsigned remainingSlots = slots - 1; - while (remainingSlots > 0 && reg <= REG_ARG_LAST) + unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; + unsigned remainingSlots = slots; + + if (!isOnStack) { - argMask |= genRegMask(reg); - reg = (regNumber)(reg + 1); - remainingSlots--; - regCount++; + remainingSlots = slots - 1; + + regNumber reg = (regNumber)(argReg + 1); + while (remainingSlots > 0 && reg <= REG_ARG_LAST) + { + argMask |= genRegMask(reg); + reg = (regNumber)(reg + 1); + remainingSlots--; + regCount++; + } } short internalIntCount = 0; @@ -915,9 +1022,21 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // This TYP_STRUCT argument is also passed in the outgoing argument area // We need a register to address the TYP_STRUCT // And we may need 2 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + internalIntCount = 1; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING internalIntCount = 2; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } argNode->gtLsraInfo.internalIntCount = internalIntCount; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (argNode->gtOper == GT_PUTARG_REG) + { + argNode->gtLsraInfo.setDstCandidates(l, argMask); + argNode->gtLsraInfo.setSrcCandidates(l, argMask); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -931,6 +1050,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) { argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode)); } + +#if FEATURE_VARARG // In the case of a varargs call, the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the integer and floating point registers. // Since the integer register is not associated with this arg node, we will reserve it as @@ -942,6 +1063,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) tree->gtLsraInfo.setInternalIntCount(tree->gtLsraInfo.internalIntCount + 1); tree->gtLsraInfo.addInternalCandidates(l, genRegMask(targetReg)); } +#endif // FEATURE_VARARG } // Now, count stack args @@ -995,6 +1117,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) args = args->gtOp.gtOp2; } +#if FEATURE_VARARG // If it is a fast tail call, it is already preferenced to use RAX. // Therefore, no need set src candidates on call tgt again. if (tree->gtCall.IsVarargs() && @@ -1007,6 +1130,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) // by Amd64 ABI. ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); } +#endif // !FEATURE_VARARG } break; @@ -1020,7 +1144,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->dstCount = 1; } break; - #ifdef _TARGET_X86_ case GT_LDOBJ: NYI_X86("GT_LDOBJ"); @@ -1218,6 +1341,116 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } break; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_PUTARG_STK: + { + if (tree->TypeGet() != TYP_STRUCT) + { + TreeNodeInfoInitSimple(tree, info, kind); + break; + } + + GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk(); + + GenTreePtr dstAddr = tree; + GenTreePtr srcAddr = tree->gtOp.gtOp1; + + assert(srcAddr->OperGet() == GT_LDOBJ); + info->srcCount = srcAddr->gtLsraInfo.dstCount; + + // If this is a stack variable address, + // make the op1 contained, so this way + // there is no unnecessary copying between registers. + // To avoid assertion, increment the parent's source. + // It is recovered below. + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + info->srcCount += 1; + } + + info->dstCount = 0; + + // In case of a CpBlk we could use a helper call. In case of putarg_stk we + // can't do that since the helper call could kill some already set up outgoing args. + // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj. + // The cpyXXXX code is rather complex and this could cause it to be more complex, but + // it might be the right thing to do. + + // This threshold will decide from using the helper or let the JIT decide to inline + // a code sequence of its choice. + ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); + ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE; + + // TODO-X86-CQ: The helper call either is not supported on x86 or required more work + // (I don't know which). + + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. + // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of + // our framework assemblies, so this is the main code generation scheme we'll use. + if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0) + { + // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. + // + // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte. + // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude + // RBM_NON_BYTE_REGS from internal candidates. + if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) + { + info->internalIntCount++; + regMaskTP regMask = l->allRegs(TYP_INT); + +#ifdef _TARGET_X86_ + if ((size % 2) != 0) + { + regMask &= ~RBM_NON_BYTE_REGS; + } +#endif + info->setInternalCandidates(l, regMask); + } + + if (size >= XMM_REGSIZE_BYTES) + { + // If we have a buffer larger than XMM_REGSIZE_BYTES, + // reserve an XMM register to use it for a + // series of 16-byte loads and stores. + info->internalFloatCount = 1; + info->addInternalCandidates(l, l->internalFloatRegCandidates()); + } + + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); + } + + // If src or dst are on stack, we don't have to generate the address into a register + // because it's just some constant+SP + putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll; + } + else + { + info->internalIntCount += 3; + info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI)); + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1()); + } + + putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr; + } + + // Always mark the LDOBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. + MakeSrcContained(putArgStkTree, srcAddr); + + // Balance up the inc above. + if (srcAddr->gtGetOp1()->OperIsLocalAddr()) + { + info->srcCount -= 1; + } + } + + break; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + case GT_COPYBLK: { // Sources are src, dest and size (or class token for CpObj). @@ -2995,6 +3228,6 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode) return true; } -#endif // _TARGET_AMD64_ +#endif // _TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index d8341b1d7f..8f11af9878 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2671,14 +2671,14 @@ LinearScan::buildInternalRegisterDefsForNode(GenTree *tree, int internalIntCount = tree->gtLsraInfo.internalIntCount; regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this); - // If this is a varArgs call, the internal candidates represent the integer registers that - // floating point arguments must be copied into. These must be handled as fixed regs. + // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set, + // then they must be handled as fixed registers. + // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) bool fixedRegs = false; - if ((internalIntCount != 0) && (tree->OperGet() == GT_CALL)) + regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); + if (((int)genCountBits(internalIntCandidates)) == internalIntCount) { - assert(tree->gtCall.IsVarargs()); fixedRegs = true; - assert((int)genCountBits(internalCands) == internalIntCount); } for (count = 0; count < internalIntCount; count++) @@ -3317,6 +3317,50 @@ LinearScan::insertZeroInitRefPositions() } } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +// ----------------------------------------------------------------------- +// Sets the register state for an argument of type STRUCT for System V systems. +// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp +// for how state for argument is updated for unix non-structs and Windows AMD64 structs. +void +LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(argDsc->lvType == TYP_STRUCT); + RegState * intRegState = &compiler->codeGen->intRegState; + RegState * floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + else + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + } + + + if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + else + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + } +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate // regState (either compiler->intRegState or compiler->floatRegState), @@ -3339,31 +3383,41 @@ LinearScan::insertZeroInitRefPositions() void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { - RegState * intRegState = &compiler->codeGen->intRegState; - RegState * floatRegState = &compiler->codeGen->floatRegState; - - // In the case of AMD64 we'll still use the floating point registers - // to model the register usage for argument on vararg calls, so - // we will ignore the varargs condition to determine whether we use - // XMM registers or not for setting up the call. - bool isFloat = (isFloatRegType(argDsc->lvType) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For System V AMD64 calls the argDsc can have 2 registers (for structs.) + // Handle them here. + if (argDsc->lvType == TYP_STRUCT) + { + unixAmd64UpdateRegStateForArg(argDsc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + RegState * intRegState = &compiler->codeGen->intRegState; + RegState * floatRegState = &compiler->codeGen->floatRegState; + // In the case of AMD64 we'll still use the floating point registers + // to model the register usage for argument on vararg calls, so + // we will ignore the varargs condition to determine whether we use + // XMM registers or not for setting up the call. + bool isFloat = (isFloatRegType(argDsc->lvType) #ifndef _TARGET_AMD64_ - && !compiler->info.compIsVarArgs + && !compiler->info.compIsVarArgs #endif - ); + ); #ifdef _TARGET_ARM_ - if (argDsc->lvIsHfaRegArg) isFloat = true; + if (argDsc->lvIsHfaRegArg) isFloat = true; #endif // _TARGET_ARM_ - if (isFloat) - { - JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(floatRegState, argDsc); - } - else - { - JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(intRegState, argDsc); + if (isFloat) + { + JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(floatRegState, argDsc); + } + else + { + JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(intRegState, argDsc); + } } } @@ -3548,7 +3602,9 @@ LinearScan::buildIntervals() // won't have done dataflow on it, but it needs to be marked as live-in so // it will get saved in the prolog. if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) + { continue; + } if (argDsc->lvIsRegArg) updateRegStateForArg(argDsc); diff --git a/src/jit/lsra.h b/src/jit/lsra.h index e57873fb65..cef6669513 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -574,6 +574,14 @@ private: void buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors); #endif //FEATURE_SIMD +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For AMD64 on SystemV machines. This method + // is called as replacement for raUpdateRegStateForArg + // that is used on Windows. On System V systems a struct can be passed + // partially using registers from the 2 register files. + void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); @@ -998,7 +1006,6 @@ private: // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. VARSET_TP largeVectorCalleeSaveCandidateVars; #endif // FEATURE_SIMD - }; /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index f3eb506b0d..b000f58969 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -926,6 +926,7 @@ fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs) argTableSize = numArgs; // the allocated table size argsComplete = false; argsSorted = false; + if (argTableSize == 0) argTable = NULL; else @@ -1127,7 +1128,6 @@ void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry) argCount++; } - fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, @@ -1137,38 +1137,79 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, { fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned) -1; - curArgTabEntry->tmpNum = (unsigned) -1; - curArgTabEntry->isSplit = false; - curArgTabEntry->isTmp = false; - curArgTabEntry->needTmp = false; - curArgTabEntry->needPlace = false; - curArgTabEntry->processed = false; - curArgTabEntry->isHfaRegArg = false; - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->isSplit = false; + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + curArgTabEntry->isHfaRegArg = false; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; AddArg(curArgTabEntry); return curArgTabEntry; } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment, + const bool isStruct, + const regNumber otherRegNum, + const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) +{ + fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment); + assert(curArgTabEntry != nullptr); + + // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a + // PlaceHolder node (in case of needed late argument, for example.) + // This requires using of an extra flag. At creation time the state is right, so + // and this assert enforces that. + assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct)); + curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct + curArgTabEntry->isStruct = isStruct; // is this a struct arg + + if (isStruct && structDescPtr != nullptr) + { + curArgTabEntry->structDesc.CopyFrom(*structDescPtr); + } + + return curArgTabEntry; +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, - unsigned alignment) + unsigned alignment + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct)) { fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; - nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment); + nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment); + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a + // PlaceHolder node (in case of needed late argument, for example.) + // This reqires using of an extra flag. At creation time the state is right, so + // and this assert enforces that. + assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct)); + curArgTabEntry->isStruct = isStruct; // is this a struct arg +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) curArgTabEntry->argNum = argNum; curArgTabEntry->node = node; @@ -1399,9 +1440,24 @@ void fgArgInfo::ArgsComplete() for (unsigned curInx = 0; curInx < argCount; curInx++) { - fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL); + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + assert(curArgTabEntry != NULL); GenTreePtr argx = curArgTabEntry->node; + // If this is a struct, mark it for needing a tempVar. + // In the copyblk and store this should have minimal perf impact since + // the local vars where we copy/store to already exist and the logic for temp + // var will not create a new one if it creates a tempVar from another tempVar. + // (Debugging through the code, there was no new copy of data created, neither a new tempVar.) + // The need for this arise from Lower::LowerArg. + // In case of copyblk and store operation, the NewPutArg method will + // not be invoked and the struct will not be loaded to be passed in + // registers or by value on the stack. + if (argx->TypeGet() == TYP_STRUCT FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct)) + { + curArgTabEntry->needTmp = true; + } + if (curArgTabEntry->regNum == REG_STK) { hasStackArgs = true; @@ -1415,8 +1471,11 @@ void fgArgInfo::ArgsComplete() } else // we have a register argument, next we look for a TYP_STRUCT { - if (argx->TypeGet() == TYP_STRUCT) + if (argx->TypeGet() == TYP_STRUCT + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct)) + { hasStructRegArg = true; + } } /* If the argument tree contains an assignment (GTF_ASG) then the argument and @@ -1461,7 +1520,6 @@ void fgArgInfo::ArgsComplete() } } - #if FEATURE_FIXED_OUT_ARGS // Like calls, if this argument has a tree that will do an inline throw, // a call to a jit helper, then we need to treat it like a call (but only @@ -1917,7 +1975,11 @@ void fgArgInfo::SortArgs() argsSorted = true; } -GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) +// This function creates a tmp var ony if needed. +// We need this to be done in order to enforce ordering +// of the evaluation of arguments. There are times this function will not be called for an argument at all. +GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters)) { LclVarDsc * varDsc = &lvaTable[tmpVarNum]; assert(varDsc->lvIsTemp); @@ -1926,9 +1988,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) // Create a copy of the temp to go into the late argument list GenTreePtr arg = gtNewLclvNode(tmpVarNum, type); -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) if (type == TYP_STRUCT) { + + +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING switch (lvaLclExactSize(tmpVarNum)) { case 1: type = TYP_BYTE; break; @@ -1953,6 +2018,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) default: break; } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + // If we didn't change the type of the struct, it means // its structure doesn't support to be passed directly through a // register, so we need to pass a pointer to the destination where @@ -1960,7 +2027,23 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) if (type == TYP_STRUCT) { arg->gtFlags |= GTF_DONT_CSE; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + // If it is passed in registers, don't get the address of the var. Make it a + // field instead. It will be loaded in registers with putarg_reg tree in lower. + if (passedInRegisters) + { + arg->ChangeOper(GT_LCL_FLD); + arg->gtType = type; + } + else + { + arg = gtNewOperNode(GT_ADDR, TYP_STRUCT, arg); + } +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } else { @@ -1973,10 +2056,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) arg->gtFlags |= GTF_DONT_CSE; arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); - // Ldobj the temp to use it as a call argument - arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum) - ); + arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum)); arg->gtFlags |= GTF_EXCEPT; #endif // _TARGET_AMD64_ @@ -2007,7 +2088,7 @@ void fgArgInfo::EvalArgsToTemps() // Only the register arguments need to be replaced with placeholders node // stacked arguments are evaluated and pushed in order // - if (curArgTabEntry->regNum == REG_STK) + if (curArgTabEntry->regNum == REG_STK && !curArgTabEntry->needTmp) continue; #endif @@ -2019,9 +2100,11 @@ void fgArgInfo::EvalArgsToTemps() { // Create a copy of the temp to go into the late argument list tmpVarNum = curArgTabEntry->tmpNum; - defArg = compiler->fgMakeTmpArgNode(tmpVarNum); + defArg = compiler->fgMakeTmpArgNode( + tmpVarNum + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters)); - /* mark the original node as a late argument */ + // mark the original node as a late argument argx->gtFlags |= GTF_LATE_ARG; } else @@ -2036,7 +2119,7 @@ void fgArgInfo::EvalArgsToTemps() } #endif -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(argx->gtType != TYP_STRUCT); #endif @@ -2160,11 +2243,11 @@ void fgArgInfo::EvalArgsToTemps() /* For a TYP_STRUCT we also need to record the class handle of the arg */ CORINFO_CLASS_HANDLE clsHnd = NULL; -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(argx->gtType != TYP_STRUCT); -#else // _TARGET_AMD664_ +#else // _TARGET_AMD64_ if (defArg->gtType == TYP_STRUCT) { @@ -2429,6 +2512,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif unsigned argSlots = 0; + unsigned nonRegPassedStructSlots = 0; bool lateArgsComputed = (call->gtCallLateArgs != nullptr); bool callHasRetBuffArg = ((call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); @@ -2606,13 +2690,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) (call->gtCallObjp->gtType == TYP_I_IMPL)); /* this is a register argument - put it in the table */ - call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); + call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + , false, REG_STK, nullptr +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + ); } else { /* this is a register argument - possibly update it in the table */ call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); } + // this can't be a struct. + assert(argx->gtType != TYP_STRUCT); /* Increment the argument register count and argument index */ if (!varTypeIsFloating(argx->gtType)) @@ -2714,9 +2804,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // _TARGET_ARM_ +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + bool nonRegPassableStruct = false; + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + bool hasStructArgument = false; for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) { GenTreePtr * parentArgx = &args->gtOp.gtOp1; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!hasStructArgument) + { + hasStructArgument = (args->gtOp.gtOp1->TypeGet() == TYP_STRUCT); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING argx = fgMorphTree(*parentArgx); *parentArgx = argx; flagsSummary |= argx->gtFlags; @@ -2741,7 +2844,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned size = 0; CORINFO_CLASS_HANDLE copyBlkClass = NULL; - bool isRegArg; + bool isRegArg = false; fgArgTabEntryPtr argEntry = NULL; @@ -2816,14 +2919,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #elif defined(_TARGET_AMD64_) - - passUsingFloatRegs = varTypeIsFloating(argx); - #if defined(UNIX_AMD64_ABI) + if (lateArgsComputed) + { + passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); + } + else + { + passUsingFloatRegs = varTypeIsFloating(argx); + } bool passUsingIntRegs; passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); -#endif // UNIX_AMD64_ABI - +#else // !UNIX_AMD64_ABI + passUsingFloatRegs = varTypeIsFloating(argx); +#endif // !UNIX_AMD64_ABI #elif defined(_TARGET_X86_) passUsingFloatRegs = false; @@ -2836,6 +2945,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use var_types structBaseType = TYP_STRUCT; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + unsigned int structFloatRegs = 0; + unsigned int structIntRegs = 0; +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + bool isStructArg = argx->gtType == TYP_STRUCT; + if (lateArgsComputed) { assert(argEntry != NULL); @@ -2870,12 +2985,24 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // stack slots, or both if the argument is split between the registers and the stack. // - if (argx->IsArgPlaceHolderNode() || (argx->gtType != TYP_STRUCT)) + if (argx->IsArgPlaceHolderNode() || (!isStructArg)) { #if defined(_TARGET_AMD64_) +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!isStructArg) + { + size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' + } + else + { + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc); + } +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) - if (argx->gtType == TYP_STRUCT) + if (isStructArg) { // Structs are eith passed in 1 or 2 (64-bit) slots size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; @@ -2891,7 +3018,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = 1; // On ARM64, all primitives fit in a single (64-bit) 'slot' } #elif defined(_TARGET_ARM_) - if (argx->gtType == TYP_STRUCT) + if (isStructArg) { size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; } @@ -2915,10 +3042,26 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) else // argx->gtType == TYP_STRUCT { /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ - if (argx->gtOper == GT_MKREFANY) + if (argx->gtOper == GT_MKREFANY) { + if (argx->TypeGet() == TYP_STRUCT) + { + isStructArg = true; + } #ifdef _TARGET_AMD64_ - size = 1; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argx->TypeGet() == TYP_STRUCT) + { + size = info.compCompHnd->getClassSize(impGetRefAnyClass()); + unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE); + size = roundupSize / TARGET_POINTER_SIZE; + eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + size = 1; + } #else size = 2; #endif @@ -2942,22 +3085,42 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) BADCODE("illegal argument tree in fgMorphArgs"); CORINFO_CLASS_HANDLE ldObjClass = argLdobj->gtLdObj.gtClass; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + eeGetSystemVAmd64PassStructInRegisterDescriptor(ldObjClass, &structDesc); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + unsigned originalSize = info.compCompHnd->getClassSize(ldObjClass); + originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize); unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); bool passStructByRef = false; #ifndef _TARGET_X86_ +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING // Check for TYP_STRUCT argument with size 1, 2, 4 or 8 bytes // As we can optimize these by turning them into a GT_IND of the correct type - if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize-1)) != 0)) + if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0)) +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { // Normalize 'size' to the number of pointer sized items // 'size' is the number of register slots that we will use to pass the argument size = roundupSize / TARGET_POINTER_SIZE; #if defined(_TARGET_AMD64_) +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING size = 1; // This must be copied to a temp and passed by address passStructByRef = true; copyBlkClass = ldObjClass; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + if (!structDesc.passedInRegisters) + { + passStructByRef = false; + copyBlkClass = NULL; + } + else + { + passStructByRef = true; + copyBlkClass = ldObjClass; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) if (size > 2) { @@ -2985,6 +3148,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_ARM_ } +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING else { // change our GT_LDOBJ into a GT_IND of the correct type @@ -3109,10 +3273,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = 1; } -#endif // not _TARGET_X86_ +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // not _TARGET_X86_ // We still have a TYP_STRUCT unless we converted the GT_LDOBJ into a GT_IND above... - if ((structBaseType == TYP_STRUCT) && !passStructByRef) { // if the valuetype size is not a multiple of sizeof(void*), @@ -3158,8 +3322,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // Figure out if the argument will be passed in a register. // + bool passedInRegisters = true; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + passedInRegisters = !isStructArg; + if (!passedInRegisters) + { + if (structDesc.passedInRegisters) + { + passedInRegisters = true; + } + else + { + passedInRegisters = false; + } + } - if (isRegParamType(genActualType(argx->TypeGet()))) +#endif + if (passedInRegisters && isRegParamType(genActualType(argx->TypeGet()))) { #ifdef _TARGET_ARM_ if (passUsingFloatRegs) @@ -3192,13 +3371,48 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #else // _TARGET_ARM_ #if defined(UNIX_AMD64_ABI) - if (passUsingFloatRegs) + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Here a struct can be passed in register following the classifications of its members and size. + // Now make sure there are actually enough registers to do so. + if (isStructArg) { - isRegArg = fltArgRegNum < MAX_FLOAT_REG_ARG; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger || + structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference) + { + structIntRegs++; + } + else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE) + { + structFloatRegs++; + } + } + + if (((nextFltArgRegNum + structFloatRegs) > MAX_FLOAT_REG_ARG) || + ((intArgRegNum + structIntRegs) > MAX_REG_ARG)) + { + isRegArg = false; + nonRegPassableStruct = true; + } + else + { + isRegArg = true; + nonRegPassableStruct = false; + } } else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - isRegArg = intArgRegNum < MAX_REG_ARG; + if (passUsingFloatRegs) + { + isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } } #else // !defined(UNIX_AMD64_ABI) isRegArg = intArgRegNum < maxRegArgs; @@ -3208,6 +3422,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) else { isRegArg = false; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + nonRegPassableStruct = true; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } } @@ -3245,16 +3463,67 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_ARM_ - if (isRegArg) { - // fill in or update the argInfo table + regNumber nextRegNum = REG_STK; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + regNumber nextOtherRegNum = REG_STK; + + if (isStructArg) + { + // It is a struct passed in registers. Assign the next available register. + unsigned int curIntReg = intArgRegNum; + unsigned int curFloatReg = nextFltArgRegNum; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger || + structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference) + { + if (i == 0) + { + nextRegNum = genMapIntRegArgNumToRegNum(curIntReg); + } + else if (i == 1) + { + nextOtherRegNum = genMapIntRegArgNumToRegNum(curIntReg); + } + else + { + assert(false && "fgMorphArgs Invalid index for int classification."); + } - regNumber nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum); + curIntReg++; + } + else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE) + { + if (i == 0) + { + nextRegNum = genMapFloatRegArgNumToRegNum(curFloatReg); + } + else if (i == 1) + { + nextOtherRegNum = genMapFloatRegArgNumToRegNum(curFloatReg); + } + else + { + assert(false && "fgMorphArgs Invalid index for SSE classification."); + } + curFloatReg++; + } + } + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // fill in or update the argInfo table + nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum); + } #ifdef _TARGET_AMD64_ - assert(size == 1); +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(size == 1); +#endif #endif #ifndef LEGACY_BACKEND @@ -3263,14 +3532,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // They should not affect the placement of any other args or stack space required. // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - bool nonStandardFound = false; for (int i=0; i<nonStandardArgs.Height(); i++) { hasNonStandardArg = true; if (argx == nonStandardArgs.Index(i).node) { - fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nonStandardArgs.Index(i).reg, size, argAlign); + fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, + args, nonStandardArgs.Index(i).reg, size, argAlign +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + , isStructArg, nextOtherRegNum, &structDesc +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + ); argEntry->isNonStandard = true; argIndex++; nonStandardFound = true; @@ -3283,9 +3556,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (!lateArgsComputed) { - /* This is a register argument - put it in the table */ - - fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign); + // This is a register argument - put it in the table + fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg( + argIndex, argx, args, nextRegNum, size, argAlign +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + , isStructArg, nextOtherRegNum, &structDesc +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + ); (void)newArg; //prevent "unused variable" error from GCC #ifdef _TARGET_ARM_ newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs @@ -3294,7 +3571,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } else { - /* This is a register argument - possibly update it in the table */ + // This is a register argument - possibly update it in the table fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign); if (entry->isNonStandard) { @@ -3306,45 +3583,55 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // Setup the next argRegNum value if (!isBackFilled) { - if (passUsingFloatRegs) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (isStructArg) { - fltArgRegNum += size; -#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); -#endif // _TARGET_AMD64_ -#ifdef _TARGET_ARM_ - if (fltArgRegNum > MAX_FLOAT_REG_ARG) - { - // This indicates a partial enregistration of a struct type - assert(argx->gtType == TYP_STRUCT); - unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); - assert((unsigned char)numRegsPartial == numRegsPartial); - call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); - fltArgRegNum = MAX_FLOAT_REG_ARG; - } -#endif // _TARGET_ARM_ + intArgRegNum += structIntRegs; + fltArgRegNum += structFloatRegs; } else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - intArgRegNum += size; + if (passUsingFloatRegs) + { + fltArgRegNum += size; #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE); - fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); #endif // _TARGET_AMD64_ #ifdef _TARGET_ARM_ - if (intArgRegNum > MAX_REG_ARG) - { - // This indicates a partial enregistration of a struct type - assert((argx->gtType == TYP_STRUCT) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); - unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG); - assert((unsigned char)numRegsPartial == numRegsPartial); - call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); - intArgRegNum = MAX_REG_ARG; - fgPtrArgCntCur += size - numRegsPartial; + if (fltArgRegNum > MAX_FLOAT_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert(isStructArg); + unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + fltArgRegNum = MAX_FLOAT_REG_ARG; + } +#endif // _TARGET_ARM_ } + else + { + intArgRegNum += size; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE); + fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); +#endif // _TARGET_AMD64_ +#ifdef _TARGET_ARM_ + if (intArgRegNum > MAX_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert((isStructArg) || argx->OperIsCopyBlkOp() || + (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); + unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + intArgRegNum = MAX_REG_ARG; + fgPtrArgCntCur += size - numRegsPartial; + } #endif // _TARGET_ARM_ + } } } } @@ -3352,27 +3639,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { fgPtrArgCntCur += size; - /* If the register arguments have not been determined then we must fill in the argInfo */ + // If the register arguments have not been determined then we must fill in the argInfo if (!lateArgsComputed) { - /* This is a stack argument - put it in the table */ - call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign); + // This is a stack argument - put it in the table + call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg)); + } else { - /* This is a stack argument - possibly update it in the table */ + // This is a stack argument - possibly update it in the table call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign); } } - if (copyBlkClass != NULL) { noway_assert(!lateArgsComputed); - fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); + fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc)); } #ifdef _TARGET_AMD64_ + if (argx->gtOper == GT_MKREFANY) { // 'Lower' the MKREFANY tree and insert it. @@ -3406,10 +3694,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // _TARGET_AMD64_ - argIndex++; - argSlots += size; - +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (nonRegPassableStruct) + { + nonRegPassedStructSlots += size; + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + argSlots += size; } // end foreach argument loop if (!lateArgsComputed) @@ -3478,18 +3771,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // and ignores floating point args (it is overly conservative in that case). if (argSlots <= MAX_REG_ARG) { - preallocatedArgCount = 0; + preallocatedArgCount = nonRegPassedStructSlots; } else { - preallocatedArgCount = argSlots - MAX_REG_ARG; + preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG; } #elif defined(_TARGET_AMD64_) preallocatedArgCount = max(4, argSlots); #else #error Unsupported or unset target architecture #endif // _TARGET_* - if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize) { lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; @@ -3514,39 +3806,242 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // If the register arguments have already been determined // or we have no register arguments then we are done. - if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg)) + bool needEvalArgsToTemps = true; + + if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument)) { - return call; + needEvalArgsToTemps = false; } - // This is the first time that we morph this call AND it has register arguments. - // Follow into the code below and do the 'defer or eval to temp' analysis. + if (needEvalArgsToTemps) + { + // This is the first time that we morph this call AND it has register arguments. + // Follow into the code below and do the 'defer or eval to temp' analysis. - call->fgArgInfo->SortArgs(); + call->fgArgInfo->SortArgs(); - call->fgArgInfo->EvalArgsToTemps(); + call->fgArgInfo->EvalArgsToTemps(); - // We may have updated the arguments - if (call->gtCallArgs) - { - UpdateGT_LISTFlags(call->gtCallArgs); + // We may have updated the arguments + if (call->gtCallArgs) + { + UpdateGT_LISTFlags(call->gtCallArgs); + } } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Rewrite the struct args to be passed by value on stack or in registers. + fgMorphSystemVStructArgs(call, hasStructArgument); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + return call; } #ifdef _PREFAST_ #pragma warning(pop) #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// fgMorphSystemVStructArgs: +// Rewrite the struct args to be passed by value on stack or in registers. +// +// args: +// call: The cll whose arguments need to be morphed.. +// hasStructArgument: Whether this call has struct arguments. +// +void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument) +{ + unsigned flagsSummary = 0; + GenTreePtr args; + GenTreePtr argx; + + if (hasStructArgument) + { + fgArgInfoPtr allArgInfo = call->fgArgInfo; + + for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) + { + // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. + // For suchlate args the gtCallArgList contains the setup arg node (ealuating the arg.) + // The tree from the gtCallLateArgs list is passed to the calle. The fgArgEntry node cointains the mapping + // between the nodes in both lists. If the arg is not a late arg, the fgArgEntryt->node points to itself, + // otherwise points to the list in the late args list. + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + GenTreePtr arg = argx; + bool argListCreated = false; + + var_types type = arg->TypeGet(); + + if (type == TYP_STRUCT) + { + // If we have already processed the arg... + if (arg->OperGet() == GT_LIST && arg->TypeGet() == TYP_STRUCT) + { + continue; + } + + // If already LDOBJ it is set properly already. + if (arg->OperGet() == GT_LDOBJ) + { + assert(!fgEntryPtr->structDesc.passedInRegisters); + continue; + } + + assert( + arg->OperGet() == GT_ADDR || + arg->OperGet() == GT_LCL_FLD || + arg->OperGet() == GT_LCL_VAR); + + assert( + arg->OperGet() == GT_LCL_VAR || + arg->OperGet() == GT_LCL_FLD || + arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || + arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR); + + GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ? + arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon(); + if (fgEntryPtr->structDesc.passedInRegisters) + { + if (fgEntryPtr->structDesc.eightByteCount == 1) + { + // Change the type and below the code will change the LclVar to a LCL_FLD + type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], fgEntryPtr->structDesc.eightByteSizes[0]); + } + else if (fgEntryPtr->structDesc.eightByteCount == 2) + { + // Create LCL_FLD for each eightbyte. + argListCreated = true; + + // Second eightbyte. + GenTreeLclFld* newLclField = new(this, GT_LCL_FLD) GenTreeLclFld( + GetTypeFromClassificationAndSizes( + fgEntryPtr->structDesc.eightByteClassifications[1], + fgEntryPtr->structDesc.eightByteSizes[1]), + lclCommon->gtLclNum, + fgEntryPtr->structDesc.eightByteOffsets[1]); + GenTreeArgList* secondNode = gtNewListNode(newLclField, nullptr); + secondNode->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case. + newLclField->gtFieldSeq = FieldSeqStore::NotAField(); + + // First field + arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); + arg->gtType = GetTypeFromClassificationAndSizes( + fgEntryPtr->structDesc.eightByteClassifications[0], + fgEntryPtr->structDesc.eightByteSizes[0]); + arg = gtNewListNode(arg, secondNode); + arg->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case. + } + else + { + assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes for the CLR. + } + } + + // If we didn't change the type of the struct, it means + // its classification doesn't support to be passed directly through a + // register, so we need to pass a pointer to the destination where + // where we copied the struct to. + if (!argListCreated) + { + if (fgEntryPtr->structDesc.passedInRegisters) + { + arg->gtType = type; + } + else + { + arg->gtType = TYP_I_IMPL; + + // Make sure this is an addr node. + if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR) + { + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + } + + assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR); + + // Ldobj the temp to use it as a call argument + arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(lclCommon->gtLclNum)); + arg->gtFlags |= GTF_EXCEPT; + flagsSummary |= GTF_EXCEPT; + } + } + } + + if (argx != arg) + { + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + + fgEntryPtr->node = arg; + if (isLateArg) + { + lateList->gtOp.gtOp1 = arg; + } + else + { + args->gtOp.gtOp1 = arg; + } + } + } + } + + // Update the flags + call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg void -Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass) +Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, + GenTree* args, + unsigned argIndex, + CORINFO_CLASS_HANDLE copyBlkClass + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)) { GenTree* argx = args->Current(); - noway_assert(argx->gtOper != GT_MKREFANY); - // See if we need to insert a copy at all // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time // but if there is only one use and no loops, the use must be last. @@ -3616,8 +4111,6 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned fgCurrentlyInUseArgTemps->setBit(tmp); - - // TYP_SIMD structs should not be enregistered, since ABI requires it to be // allocated on stack and address of it needs to be passed. if (lclVarIsSIMDType(tmp)) @@ -3648,13 +4141,16 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned #if FEATURE_FIXED_OUT_ARGS // Do the copy early, and evalute the temp later (see EvalArgsToTemps) + // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode GenTreePtr arg = copyBlk; #else // FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression - GenTreePtr arg = fgMakeTmpArgNode(tmp); + GenTreePtr arg = fgMakeTmpArgNode( + tmp + FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters)); // Change the expression to "(tmp=val),tmp" arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); @@ -3718,30 +4214,60 @@ void Compiler::fgFixupStructReturn(GenTreePtr call) { bool callHasRetBuffArg = ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT && call->gtCall.gtRetClsHnd != NO_CLASS_HANDLE) + { + eeGetSystemVAmd64PassStructInRegisterDescriptor(GetStructClassHandle(call), &structDesc); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT) { -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) if (call->gtCall.IsVarargs() || !IsHfa(call)) -#endif +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!structDesc.passedInRegisters) +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { // Now that we are past the importer, re-type this node so the register predictor does // the right thing call->gtType = genActualType((var_types)call->gtCall.gtReturnType); } +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + else + { + if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1) + { + call->gtType = genActualType(getEightByteType(structDesc, 0)); + } + } +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } - #ifdef _TARGET_ARM_ // Either we don't have a struct now or if struct, then it is HFA returned in regs. assert(call->TypeGet() != TYP_STRUCT || (IsHfa(call) && !callHasRetBuffArg)); #else +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. + assert((call->TypeGet() != TYP_STRUCT) || + (structDesc.passedInRegisters) || + (callHasRetBuffArg)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // No more struct returns assert(call->TypeGet() != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #endif +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // If there is a struct that is returned in registers there might be a retbuf (homing space for the return) and type struct. + assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID) || (call->TypeGet() == TYP_STRUCT)); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // If it was a struct return, it has been transformed into a call // with a return buffer (that returns TYP_VOID) or into a return // of a primitive/enregisterable type assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID)); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } @@ -4698,7 +5224,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma ); } #endif - if (fldOffset != 0) { // Generate the "addr" node. @@ -5180,6 +5705,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } // Get the size of the struct and see if it is 1, 2, 4 or 8 bytes in size + // For Amd64-Unix the call below checks to see if the struct is register passable. if (argx->OperGet() == GT_LDOBJ) { #ifdef _TARGET_AMD64_ @@ -5634,6 +6160,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; #endif +#ifdef FEATURE_PAL + if (!canFastTailCall && szFailReason == nullptr) + { + szFailReason = "Non fast tail calls disabled for PAL based systems."; + } +#endif // FEATURE_PAL + if (szFailReason != nullptr) { #ifdef DEBUG @@ -5659,13 +6192,6 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) compCurBB->bbJumpKind = BBJ_RETURN; #endif -#ifdef FEATURE_PAL - if (!canFastTailCall) - { - goto NO_TAIL_CALL; - } -#endif // FEATURE_PAL - // Set this flag before calling fgMorphCall() to prevent inlining this call. call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; @@ -5847,6 +6373,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // This is a HFA, use float 0. callType = TYP_FLOAT; } +#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Return a dummy node, as the return is already removed. + if (callType == TYP_STRUCT) + { + // This is an register-returned struct. Return a 0. + callType = TYP_INT; + } #endif result = gtNewZeroConNode(genActualType(callType)); result = fgMorphTree(result); @@ -5990,7 +6523,6 @@ NO_TAIL_CALL: retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); lvaSetStruct(retValTmpNum, structHnd, true); - dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); } } @@ -6400,6 +6932,7 @@ ONE_SIMPLE_ASG: if (lclVarTree->TypeGet() == TYP_STRUCT && (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) { + // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) goto GENERAL_BLKOP; } @@ -7203,8 +7736,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) { // Spill the (complex) address to a BYREF temp. // Note, at most one address may need to be spilled. - addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + lvaTable[addrSpillTemp].lvType = TYP_I_IMPL; + + tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_I_IMPL), + addrSpill); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING lvaTable[addrSpillTemp].lvType = TYP_BYREF; if (addrSpillIsStackDest) @@ -7214,6 +7752,8 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifndef LEGACY_BACKEND // If we are assigning the address of a LclVar here // liveness does not account for this kind of address taken use. @@ -9529,7 +10069,7 @@ COMPARE: case GT_ADD: -CM_OVF_OP: + CM_OVF_OP : if (tree->gtOverflow()) { tree->gtRequestSetFlags(); @@ -10906,7 +11446,9 @@ ASG_OP: if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) { if (tree->gtOverflow() || op1->gtOverflow()) + { break; + } ssize_t imul = op2->gtIntCon.gtIconVal; ssize_t iadd = add->gtIntCon.gtIconVal; @@ -12825,7 +13367,11 @@ void Compiler::fgMorphBlocks() //replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. if (genReturnLocal != BAD_VAR_NUM) { +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + noway_assert(info.compRetType != TYP_VOID); +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT); +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) noway_assert(block->bbTreeList); GenTreePtr last = block->bbTreeList->gtPrev; @@ -13834,9 +14380,9 @@ void Compiler::fgPromoteStructs() break; } -#ifdef _TARGET_ARM_ +#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (!varDsc->lvDontPromote) -#endif // _TARGET_ARM_ +#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { #ifdef FEATURE_SIMD if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) @@ -14154,6 +14700,8 @@ void Compiler::fgMarkImplicitByRefArgs() size = info.compCompHnd->getClassSize(typeHnd); } + +#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #if defined(_TARGET_AMD64_) if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) #elif defined(_TARGET_ARM64_) @@ -14184,6 +14732,7 @@ void Compiler::fgMarkImplicitByRefArgs() varDsc->lvKeepType = 1; #endif // DEBUG } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } } diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 839f497f4a..89945301f0 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -667,7 +667,7 @@ void Compiler::raSetupArgMasks(RegState *regState) #endif // LEGACY_BACKEND // The code to set the regState for each arg is outlined for shared use -// by linear scan +// by linear scan. (It is not shared for System V AMD64 platform.) regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) { regNumber inArgReg = argDsc->lvArgReg; diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp index a108713792..53a5960967 100644 --- a/src/jit/scopeinfo.cpp +++ b/src/jit/scopeinfo.cpp @@ -909,21 +909,65 @@ void CodeGen::psiBegProlog() psiScope * newScope = psiNewPrologScope(varScope->vsdLVnum, varScope->vsdVarNum); - if (lclVarDsc1->lvIsRegArg) + if (lclVarDsc1->lvIsRegArg) { -#ifdef DEBUG - var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet()); -#ifdef _TARGET_ARM_ - if (lclVarDsc1->lvIsHfaRegArg) + bool isStructHandled = false; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + if (lclVarDsc1->TypeGet() == TYP_STRUCT) { - regType = lclVarDsc1->GetHfaType(); + CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + + for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++) + { + unsigned len = structDesc.eightByteSizes[nCnt]; + var_types regType = TYP_UNDEF; + regNumber regNum = REG_NA; + if (nCnt == 0) + { + regNum = lclVarDsc1->lvArgReg; + } + else if (nCnt == 1) + { + regNum = lclVarDsc1->lvOtherArgReg; + } + else + { + assert(false && "Invalid eightbyte number."); + } + + regType = compiler->getEightByteType(structDesc, nCnt); +#ifdef DEBUG + regType = compiler->mangleVarArgsType(regType); + assert(genMapRegNumToRegArgNum(regNum, regType) != (unsigned)-1); +#endif // DEBUG + + newScope->scRegister = true; + newScope->u1.scRegNum = (regNumberSmall)regNum; + } + + isStructHandled = true; } +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (!isStructHandled) + { +#ifdef DEBUG + var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet()); +#ifdef _TARGET_ARM_ + if (lclVarDsc1->lvIsHfaRegArg) + { + regType = lclVarDsc1->GetHfaType(); + } #endif // _TARGET_ARM_ - assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1); + assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1); #endif // DEBUG - newScope->scRegister = true; - newScope->u1.scRegNum = (regNumberSmall) lclVarDsc1->lvArgReg; + newScope->scRegister = true; + newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg; + } } else { diff --git a/src/jit/target.h b/src/jit/target.h index f4aad4e153..767eb31d8d 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -19,6 +19,12 @@ #endif #endif +#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) +#define FEATURE_VARARG 0 +#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) +#define FEATURE_VARARG 1 +#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX)) + /*****************************************************************************/ // The following are intended to capture only those #defines that cannot be replaced // with static const members of Target @@ -971,10 +977,28 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_LNGRET REG_EAX #define RBM_LNGRET RBM_EAX +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + #define REG_INTRET_1 REG_RDX + #define RBM_INTRET_1 RBM_RDX + + #define REG_LNGRET_1 REG_RDX + #define RBM_LNGRET_1 RBM_RDX +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + #define REG_FLOATRET REG_XMM0 #define RBM_FLOATRET RBM_XMM0 + #define REG_DOUBLERET REG_XMM0 #define RBM_DOUBLERET RBM_XMM0 +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#define REG_FLOATRET_1 REG_XMM1 +#define RBM_FLOATRET_1 RBM_XMM1 + +#define REG_DOUBLERET_1 REG_XMM1 +#define RBM_DOUBLERET_1 RBM_XMM1 +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "rbp" @@ -1872,7 +1896,7 @@ extern const regMaskSmall regMasks[REG_COUNT]; inline regMaskTP genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); -#if defined _TARGET_AMD64_ +#ifdef _TARGET_AMD64_ // shift is faster than a L1 hit on modern x86 // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK diff --git a/src/pal/src/cruntime/printfcpp.cpp b/src/pal/src/cruntime/printfcpp.cpp index 87cd8a8aff..8adf3470c2 100644 --- a/src/pal/src/cruntime/printfcpp.cpp +++ b/src/pal/src/cruntime/printfcpp.cpp @@ -2306,7 +2306,7 @@ int CoreVfprintf(CPalThread *pthrCurrent, PAL_FILE *stream, const char *format, if (!Length) { ASSERT("WideCharToMultiByte failed. Error is %d\n", - GetLastError()); + GetLastError()); PERF_EXIT(vfprintf); va_end(ap); return -1; diff --git a/src/vm/amd64/calldescrworkeramd64.S b/src/vm/amd64/calldescrworkeramd64.S index efee6f325a..ca4fd703c6 100644 --- a/src/vm/amd64/calldescrworkeramd64.S +++ b/src/vm/amd64/calldescrworkeramd64.S @@ -108,11 +108,43 @@ LOCAL_LABEL(NoFloatArguments): je LOCAL_LABEL(ReturnsFloat) cmp ecx, 8 je LOCAL_LABEL(ReturnsDouble) - // unexpected + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Struct with two integer eightbytes + cmp ecx, 16 + jne LOCAL_LABEL(NotTwoIntegerEightbytes) + mov qword ptr [rbx+CallDescrData__returnValue], rax + mov qword ptr [rbx+CallDescrData__returnValue + 8], rdx + jmp LOCAL_LABEL(Epilog) + +LOCAL_LABEL(NotTwoIntegerEightbytes): + // Struct with the first eightbyte SSE and the second one integer + cmp ecx, 16 + 1 + jne LOCAL_LABEL(NotFirstSSESecondIntegerEightbyte) + movsd real8 ptr [rbx+CallDescrData__returnValue], xmm0 + mov qword ptr [rbx+CallDescrData__returnValue + 8], rax + jmp LOCAL_LABEL(Epilog) + +LOCAL_LABEL(NotFirstSSESecondIntegerEightbyte): + // Struct with the first eightbyte integer and the second one SSE + cmp ecx, 16 + 2 + jne LOCAL_LABEL(NotFirstIntegerSecondSSEEightbyte) + mov qword ptr [rbx+CallDescrData__returnValue], rax + movsd real8 ptr [rbx+CallDescrData__returnValue + 8], xmm0 + jmp LOCAL_LABEL(Epilog) + +LOCAL_LABEL(NotFirstIntegerSecondSSEEightbyte): + // Struct with two SSE eightbytes + cmp ecx, 16 + 3 + jne LOCAL_LABEL(Epilog) // unexpected + movsd real8 ptr [rbx+CallDescrData__returnValue], xmm0 + movsd real8 ptr [rbx+CallDescrData__returnValue + 8], xmm1 +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + jmp LOCAL_LABEL(Epilog) LOCAL_LABEL(ReturnsInt): - mov [rbx+CallDescrData__returnValue], rax + mov qword ptr [rbx+CallDescrData__returnValue], rax LOCAL_LABEL(Epilog): lea rsp, [rbp - 8] // deallocate arguments diff --git a/src/vm/amd64/cgenamd64.cpp b/src/vm/amd64/cgenamd64.cpp index e9c1ad468b..51738684ad 100644 --- a/src/vm/amd64/cgenamd64.cpp +++ b/src/vm/amd64/cgenamd64.cpp @@ -323,8 +323,16 @@ void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD) UpdateRegDisplayFromCalleeSavedRegisters(pRD, &(m_Args->Regs)); +#ifdef UNIX_AMD64_ABI + pRD->pCurrentContextPointers->Rsi = NULL; + pRD->pCurrentContextPointers->Rdi = NULL; +#endif pRD->pCurrentContextPointers->Rcx = NULL; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + pRD->pCurrentContextPointers->Rdx = (PULONG64)&m_Args->Rdx; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING pRD->pCurrentContextPointers->Rdx = NULL; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING pRD->pCurrentContextPointers->R8 = NULL; pRD->pCurrentContextPointers->R9 = NULL; pRD->pCurrentContextPointers->R10 = NULL; diff --git a/src/vm/amd64/cgencpu.h b/src/vm/amd64/cgencpu.h index 39b8ba91de..de64b1600b 100644 --- a/src/vm/amd64/cgencpu.h +++ b/src/vm/amd64/cgencpu.h @@ -66,14 +66,15 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr); #define CACHE_LINE_SIZE 64 // Current AMD64 processors have 64-byte cache lines as per AMD64 optmization manual #define LOG2SLOT LOG2_PTRSIZE -#define ENREGISTERED_RETURNTYPE_MAXSIZE 8 // bytes #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 8 // bytes #define ENREGISTERED_PARAMTYPE_MAXSIZE 8 // bytes #ifdef UNIX_AMD64_ABI -#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter -#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter +#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // bytes +#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter +#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter #else +#define ENREGISTERED_RETURNTYPE_MAXSIZE 8 // bytes #define COM_STUBS_SEPARATE_FP_LOCATIONS #define CALLDESCR_REGTYPEMAP 1 #endif @@ -265,9 +266,11 @@ struct CalleeSavedRegistersPointers { #ifdef UNIX_AMD64_ABI +#define NUM_FLOAT_ARGUMENT_REGISTERS 8 + typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters; struct FloatArgumentRegisters { - M128A d[8]; // xmm0-xmm7 + M128A d[NUM_FLOAT_ARGUMENT_REGISTERS]; // xmm0-xmm7 }; #endif @@ -475,11 +478,23 @@ struct DECLSPEC_ALIGN(8) UMEntryThunkCode struct HijackArgs { +#ifndef PLATFORM_UNIX union { ULONG64 Rax; ULONG64 ReturnValue; }; +#else // PLATFORM_UNIX + union + { + struct + { + ULONG64 Rax; + ULONG64 Rdx; + }; + ULONG64 ReturnValue[2]; + }; +#endif // PLATFORM_UNIX CalleeSavedRegisters Regs; union { diff --git a/src/vm/amd64/unixasmhelpers.S b/src/vm/amd64/unixasmhelpers.S index 21a8f63232..058a69a382 100644 --- a/src/vm/amd64/unixasmhelpers.S +++ b/src/vm/amd64/unixasmhelpers.S @@ -184,12 +184,13 @@ NESTED_ENTRY OnHijackScalarTripThread, _TEXT, NoHandler PUSH_CALLEE_SAVED_REGISTERS + push_register rdx // Push rax again - this is where integer/pointer return values are returned push_register rax mov rdi, rsp - alloc_stack 0x20 + alloc_stack 0x28 // First float return register movdqa [rsp], xmm0 @@ -202,14 +203,55 @@ NESTED_ENTRY OnHijackScalarTripThread, _TEXT, NoHandler movdqa xmm0, [rsp] movdqa xmm1, [rsp+0x10] - free_stack 0x20 + free_stack 0x28 pop_register rax + pop_register rdx POP_CALLEE_SAVED_REGISTERS ret NESTED_END OnHijackScalarTripThread, _TEXT +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +//------------------------------------------------ +// OnHijackStructInRegsTripThread +// +NESTED_ENTRY OnHijackStructInRegsTripThread, _TEXT, NoHandler + + // Make room for the real return address (rip) + push_register rax + + PUSH_CALLEE_SAVED_REGISTERS + + push_register rdx + // Push rax again - this is where part of the struct gets returned + push_register rax + + mov rdi, rsp + + alloc_stack 0x28 + + // First float return register + movdqa [rsp], xmm0 + // Second float return register + movdqa [rsp+0x10], xmm1 + + END_PROLOGUE + + call C_FUNC(OnHijackStructInRegsWorker) + + movdqa xmm0, [rsp] + movdqa xmm1, [rsp+0x10] + free_stack 0x28 + pop_register rax + pop_register rdx + + POP_CALLEE_SAVED_REGISTERS + ret + +NESTED_END OnHijackStructInRegsTripThread, _TEXT +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + //------------------------------------------------ // OnHijackObjectTripThread // @@ -220,16 +262,22 @@ NESTED_ENTRY OnHijackObjectTripThread, _TEXT, NoHandler PUSH_CALLEE_SAVED_REGISTERS + push_register rdx // Push rax again - this is where integer/pointer return values are returned push_register rax mov rdi, rsp + // align stack + alloc_stack 0x8 + END_PROLOGUE call C_FUNC(OnHijackObjectWorker) + free_stack 0x8 pop_register rax + pop_register rdx POP_CALLEE_SAVED_REGISTERS ret @@ -246,16 +294,22 @@ NESTED_ENTRY OnHijackInteriorPointerTripThread, _TEXT, NoHandler PUSH_CALLEE_SAVED_REGISTERS + push_register rdx // Push rax again - this is where integer/pointer return values are returned push_register rax mov rdi, rsp + // align stack + alloc_stack 0x8 + END_PROLOGUE call C_FUNC(OnHijackInteriorPointerWorker) + free_stack 0x8 pop_register rax + pop_register rdx POP_CALLEE_SAVED_REGISTERS ret diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h new file mode 100644 index 0000000000..5896414f35 --- /dev/null +++ b/src/vm/argdestination.h @@ -0,0 +1,217 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// + +#ifndef __ARGDESTINATION_H__ +#define __ARGDESTINATION_H__ + +// The ArgDestination class represents a destination location of an argument. +class ArgDestination +{ + // Base address to which the m_offset is applied to get the actual argument location. + PTR_VOID m_base; + // Offset of the argument relative to the m_base. On AMD64 on Unix, it can have a special + // value that represent a struct that contain both general purpose and floating point fields + // passed in registers. + int m_offset; + // For structs passed in registers, this member points to an ArgLocDesc that contains + // details on the layout of the struct in general purpose and floating point registers. + ArgLocDesc* m_argLocDescForStructInRegs; + +public: + + // Construct the ArgDestination + ArgDestination(PTR_VOID base, int offset, ArgLocDesc* argLocDescForStructInRegs) + : m_base(base), + m_offset(offset), + m_argLocDescForStructInRegs(argLocDescForStructInRegs) + { + LIMITED_METHOD_CONTRACT; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + _ASSERTE((argLocDescForStructInRegs != NULL) || (offset != TransitionBlock::StructInRegsOffset)); +#else + _ASSERTE(argLocDescForStructInRegs == NULL); +#endif + } + + // Get argument destination address for arguments that are not structs passed in registers. + PTR_VOID GetDestinationAddress() + { + LIMITED_METHOD_CONTRACT; + return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + m_offset); + } + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + // Returns true if the ArgDestination represents a struct passed in registers. + bool IsStructPassedInRegs() + { + LIMITED_METHOD_CONTRACT; + return m_offset == TransitionBlock::StructInRegsOffset; + } + + // Get destination address for floating point fields of a struct passed in registers. + PTR_VOID GetStructFloatRegDestinationAddress() + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(IsStructPassedInRegs()); + int offset = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_argLocDescForStructInRegs->m_idxFloatReg * 8; + return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + offset); + } + + // Get destination address for non-floating point fields of a struct passed in registers. + PTR_VOID GetStructGenRegDestinationAddress() + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(IsStructPassedInRegs()); + int offset = TransitionBlock::GetOffsetOfArgumentRegisters() + m_argLocDescForStructInRegs->m_idxGenReg * 8; + return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + offset); + } + +#ifndef DACCESS_COMPILE + // Zero struct argument stored in registers described by the current ArgDestination. + // Arguments: + // fieldBytes - size of the structure + void ZeroStructInRegisters(int fieldBytes) + { + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + STATIC_CONTRACT_MODE_COOPERATIVE; + + // To zero the struct, we create a zero filled array of large enough size and + // then copy it to the registers. It is implemented this way to keep the complexity + // of dealing with the eightbyte classification in single function. + // This function is used rarely and so the overhead of reading the zeros from + // the stack is negligible. + long long zeros[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS] = {}; + _ASSERTE(sizeof(zeros) >= fieldBytes); + + CopyStructToRegisters(zeros, fieldBytes, 0); + } + + // Copy struct argument into registers described by the current ArgDestination. + // Arguments: + // src = source data of the structure + // fieldBytes - size of the structure + // destOffset - nonzero when copying values into Nullable<T>, it is the offset + // of the T value inside of the Nullable<T> + void CopyStructToRegisters(void *src, int fieldBytes, int destOffset) + { + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + STATIC_CONTRACT_MODE_COOPERATIVE; + + _ASSERTE(IsStructPassedInRegs()); + + BYTE* genRegDest = (BYTE*)GetStructGenRegDestinationAddress() + destOffset; + BYTE* floatRegDest = (BYTE*)GetStructFloatRegDestinationAddress(); + INDEBUG(int remainingBytes = fieldBytes;) + + EEClass* eeClass = m_argLocDescForStructInRegs->m_eeClass; + _ASSERTE(eeClass != NULL); + + // We start at the first eightByte that the destOffset didn't skip completely. + for (int i = destOffset / 8; i < eeClass->GetNumberEightBytes(); i++) + { + int eightByteSize = eeClass->GetEightByteSize(i); + SystemVClassificationType eightByteClassification = eeClass->GetEightByteClassification(i); + + // Adjust the size of the first eightByte by the destOffset + eightByteSize -= (destOffset & 7); + destOffset = 0; + + _ASSERTE(remainingBytes >= eightByteSize); + + if (eightByteClassification == SystemVClassificationTypeSSE) + { + if (eightByteSize == 8) + { + *(UINT64*)floatRegDest = *(UINT64*)src; + } + else + { + _ASSERTE(eightByteSize == 4); + *(UINT32*)floatRegDest = *(UINT32*)src; + } + floatRegDest += 8; + } + else + { + if (eightByteSize == 8) + { + _ASSERTE((eightByteClassification == SystemVClassificationTypeInteger) || + (eightByteClassification == SystemVClassificationTypeIntegerReference)); + + _ASSERTE(IS_ALIGNED((SIZE_T)genRegDest, 8)); + *(UINT64*)genRegDest = *(UINT64*)src; + } + else + { + _ASSERTE(eightByteClassification == SystemVClassificationTypeInteger); + memcpyNoGCRefs(genRegDest, src, eightByteSize); + } + + genRegDest += eightByteSize; + } + + src = (BYTE*)src + eightByteSize; + INDEBUG(remainingBytes -= eightByteSize;) + } + + _ASSERTE(remainingBytes == 0); + } + +#endif //DACCESS_COMPILE + + // Report managed object pointers in the struct in registers + // Arguments: + // fn - promotion function to apply to each managed object pointer + // sc - scan context to pass to the promotion function + // fieldBytes - size of the structure + void ReportPointersFromStructInRegisters(promote_func *fn, ScanContext *sc, int fieldBytes) + { + LIMITED_METHOD_CONTRACT; + + _ASSERTE(IsStructPassedInRegs()); + + TADDR genRegDest = dac_cast<TADDR>(GetStructGenRegDestinationAddress()); + INDEBUG(int remainingBytes = fieldBytes;) + + EEClass* eeClass = m_argLocDescForStructInRegs->m_eeClass; + _ASSERTE(eeClass != NULL); + + for (int i = 0; i < eeClass->GetNumberEightBytes(); i++) + { + int eightByteSize = eeClass->GetEightByteSize(i); + SystemVClassificationType eightByteClassification = eeClass->GetEightByteClassification(i); + + _ASSERTE(remainingBytes >= eightByteSize); + + if (eightByteClassification != SystemVClassificationTypeSSE) + { + if (eightByteClassification == SystemVClassificationTypeIntegerReference) + { + _ASSERTE(eightByteSize == 8); + _ASSERTE(IS_ALIGNED((SIZE_T)genRegDest, 8)); + + (*fn)(dac_cast<PTR_PTR_Object>(genRegDest), sc, 0); + } + + genRegDest += eightByteSize; + } + + INDEBUG(remainingBytes -= eightByteSize;) + } + + _ASSERTE(remainingBytes == 0); + } + +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + +}; + +#endif // __ARGDESTINATION_H__ diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp index 368e6cf810..342c73b0d0 100644 --- a/src/vm/arm/stubs.cpp +++ b/src/vm/arm/stubs.cpp @@ -1052,7 +1052,7 @@ void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expe // nop - insert padding _stub._entryPoint[n++] = 0xbf00; - + _ASSERTE(n == DispatchStub::entryPointLen); // Make sure that the data members below are aligned diff --git a/src/vm/callhelpers.cpp b/src/vm/callhelpers.cpp index a910c0ea30..137dbb8656 100644 --- a/src/vm/callhelpers.cpp +++ b/src/vm/callhelpers.cpp @@ -401,7 +401,7 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments) // Record this call if required g_IBCLogger.LogMethodDescAccess(m_pMD); - // + // // All types must already be loaded. This macro also sets up a FAULT_FORBID region which is // also required for critical calls since we cannot inject any failure points between the // caller of MethodDesc::CallDescr and the actual transition to managed code. @@ -537,9 +537,12 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments) // have at least one such argument we point the call worker at the floating point area of the // frame (we leave it null otherwise since the worker can perform a useful optimization if it // knows no floating point registers need to be set up). - if ((ofs < 0) && (pFloatArgumentRegisters == NULL)) + if (TransitionBlock::HasFloatRegister(ofs, m_argIt.GetArgLocDescForStructInRegs()) && + (pFloatArgumentRegisters == NULL)) + { pFloatArgumentRegisters = (FloatArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + } #endif #if CHECK_APP_DOMAIN_LEAKS @@ -553,6 +556,9 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments) } #endif // CHECK_APP_DOMAIN_LEAKS +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + _ASSERTE(ofs != TransitionBlock::StructInRegsOffset); +#endif PVOID pDest = pTransitionBlock + ofs; UINT32 stackSize = m_argIt.GetArgSize(); diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h index 244a3df878..490ae3ce87 100644 --- a/src/vm/callingconvention.h +++ b/src/vm/callingconvention.h @@ -42,6 +42,12 @@ struct ArgLocDesc int m_idxStack; // First stack slot used (or -1) int m_cStack; // Count of stack slots used (or 0) +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + EEClass* m_eeClass; // For structs passed in register, it points to the EEClass of the struct + +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + #if defined(_TARGET_ARM_) BOOL m_fRequires64BitAlignment; // True if the argument should always be aligned (in registers or on the stack #endif @@ -63,6 +69,9 @@ struct ArgLocDesc #if defined(_TARGET_ARM_) m_fRequires64BitAlignment = FALSE; #endif +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + m_eeClass = NULL; +#endif } }; @@ -138,9 +147,13 @@ struct TransitionBlock { LIMITED_METHOD_CONTRACT; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + return offset >= sizeof(TransitionBlock); +#else int ofsArgRegs = GetOffsetOfArgumentRegisters(); return offset >= (int) (ofsArgRegs + ARGUMENTREGISTERS_SIZE); +#endif } static BOOL IsArgumentRegisterOffset(int offset) @@ -156,14 +169,45 @@ struct TransitionBlock static UINT GetArgumentIndexFromOffset(int offset) { LIMITED_METHOD_CONTRACT; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + _ASSERTE(offset != TransitionBlock::StructInRegsOffset); +#endif return (offset - GetOffsetOfArgumentRegisters()) / sizeof(TADDR); } + + static UINT GetStackArgumentIndexFromOffset(int offset) + { + LIMITED_METHOD_CONTRACT; + + return (offset - TransitionBlock::GetOffsetOfArgs()) / STACK_ELEM_SIZE; + } + #endif #ifdef CALLDESCR_FPARGREGS static BOOL IsFloatArgumentRegisterOffset(int offset) { LIMITED_METHOD_CONTRACT; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + return (offset != TransitionBlock::StructInRegsOffset) && (offset < 0); +#else + return offset < 0; +#endif + } + + // Check if an argument has floating point register, that means that it is + // either a floating point argument or a struct passed in registers that + // has a floating point member. + static BOOL HasFloatRegister(int offset, ArgLocDesc* argLocDescForStructInRegs) + { + LIMITED_METHOD_CONTRACT; + #if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (offset == TransitionBlock::StructInRegsOffset) + { + return argLocDescForStructInRegs->m_cFloatReg > 0; + } + #endif return offset < 0; } @@ -172,7 +216,7 @@ struct TransitionBlock LIMITED_METHOD_CONTRACT; return -GetNegSpaceSize(); } -#endif +#endif // CALLDESCR_FPARGREGS static int GetOffsetOfCalleeSavedRegisters() { @@ -194,6 +238,11 @@ struct TransitionBlock } static const int InvalidOffset = -1; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Special offset value to represent struct passed in registers. Such a struct can span both + // general purpose and floating point registers, so it can have two different offsets. + static const int StructInRegsOffset = -2; +#endif }; //----------------------------------------------------------------------- @@ -340,11 +389,16 @@ public: { LIMITED_METHOD_CONTRACT; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // No arguments are passed by reference on AMD64 on Unix + return FALSE; +#else // If the size is bigger than ENREGISTERED_PARAM_TYPE_MAXSIZE, or if the size is NOT a power of 2, then // the argument is passed by reference. return (size > ENREGISTERED_PARAMTYPE_MAXSIZE) || ((size & (size-1)) != 0); +#endif } -#endif +#endif // _TARGET_AMD64_ // This overload should be used for varargs only. static BOOL IsVarArgPassedByRef(size_t size) @@ -352,7 +406,13 @@ public: LIMITED_METHOD_CONTRACT; #ifdef _TARGET_AMD64_ +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + PORTABILITY_ASSERT("ArgIteratorTemplate::IsVarArgPassedByRef"); + return FALSE; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING return IsArgPassedByRef(size); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #else return (size > ENREGISTERED_PARAMTYPE_MAXSIZE); #endif @@ -426,6 +486,15 @@ public: void GetVASigCookieLoc(ArgLocDesc * pLoc) { WRAPPER_NO_CONTRACT; GetSimpleLoc(GetVASigCookieOffset(), pLoc); } #endif // !_TARGET_X86_ + ArgLocDesc* GetArgLocDescForStructInRegs() + { +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + return m_hasArgLocDescForStructInRegs ? &m_argLocDescForStructInRegs : NULL; +#else + return NULL; +#endif + } + #ifdef _TARGET_ARM_ // Get layout information for the argument that the ArgIterator is currently visiting. void GetArgLoc(int argOffset, ArgLocDesc *pLoc) @@ -463,7 +532,7 @@ public: } else { - pLoc->m_idxStack = TransitionBlock::GetArgumentIndexFromOffset(argOffset) - 4; + pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_cStack = cSlots; } } @@ -509,7 +578,7 @@ public: } else { - pLoc->m_idxStack = TransitionBlock::GetArgumentIndexFromOffset(argOffset) - 8; + pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_cStack = cSlots; } } @@ -517,37 +586,46 @@ public: #if defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI) // Get layout information for the argument that the ArgIterator is currently visiting. - void GetArgLoc(int argOffset, ArgLocDesc *pLoc) + void GetArgLoc(int argOffset, ArgLocDesc* pLoc) { LIMITED_METHOD_CONTRACT; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (m_hasArgLocDescForStructInRegs) + { + *pLoc = m_argLocDescForStructInRegs; + return; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (argOffset == TransitionBlock::StructInRegsOffset) + { + // We always already have argLocDesc for structs passed in registers, we + // compute it in the GetNextOffset for those since it is always needed. + _ASSERTE(false); + return; + } + pLoc->Init(); if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { // Dividing by 8 as size of each register in FloatArgumentRegisters is 8 bytes. pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 8; - - // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention. pLoc->m_cFloatReg = 1; - return; } - - // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention. - int cSlots = 1; - - if (!TransitionBlock::IsStackArgumentOffset(argOffset)) + else if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - pLoc->m_cGenReg = cSlots; - } + pLoc->m_cGenReg = 1; + } else { - pLoc->m_idxStack = (argOffset - TransitionBlock::GetOffsetOfArgs()) / 8; - pLoc->m_cStack = cSlots; + pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); + pLoc->m_cStack = (GetArgSize() + STACK_ELEM_SIZE - 1) / STACK_ELEM_SIZE; } } -#endif // _TARGET_ARM64_ && UNIX_AMD64_ABI +#endif // _TARGET_AMD64_ && UNIX_AMD64_ABI protected: DWORD m_dwFlags; // Cached flags @@ -559,6 +637,10 @@ protected: CorElementType m_argType; int m_argSize; TypeHandle m_argTypeHandle; +#if defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + ArgLocDesc m_argLocDescForStructInRegs; + bool m_hasArgLocDescForStructInRegs; +#endif // _TARGET_AMD64_ && UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING #ifdef _TARGET_X86_ int m_curOfs; // Current position of the stack iterator @@ -567,9 +649,12 @@ protected: #ifdef _TARGET_AMD64_ #ifdef UNIX_AMD64_ABI - int m_idxGenReg; - int m_idxStack; - int m_idxFPReg; + int m_idxGenReg; // Next general register to be assigned a value + int m_idxStack; // Next stack slot to be assigned a value + int m_idxFPReg; // Next floating point register to be assigned a value +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + bool m_fArgInRegisters; // Indicates that the current argument is stored in registers +#endif #else int m_curOfs; // Current position of the stack iterator #endif @@ -843,6 +928,10 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset() m_argSize = argSize; m_argTypeHandle = thValueType; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + m_hasArgLocDescForStructInRegs = false; +#endif + #ifdef _TARGET_X86_ #ifdef FEATURE_INTERPRETER if (m_fUnmanagedCallConv) @@ -862,7 +951,12 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset() return m_curOfs; #elif defined(_TARGET_AMD64_) #ifdef UNIX_AMD64_ABI + + m_fArgInRegisters = true; + int cFPRegs = 0; + int cbArg = StackElemSize(argSize); + int cGenRegs = cbArg / 8; // GP reg size switch (argType) { @@ -879,8 +973,56 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset() case ELEMENT_TYPE_VALUETYPE: { - // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention. - argSize = sizeof(TADDR); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + MethodTable *pMT = m_argTypeHandle.AsMethodTable(); + if (pMT->IsRegPassedStruct()) + { + EEClass* eeClass = pMT->GetClass(); + cGenRegs = 0; + for (int i = 0; i < eeClass->GetNumberEightBytes(); i++) + { + switch (eeClass->GetEightByteClassification(i)) + { + case SystemVClassificationTypeInteger: + case SystemVClassificationTypeIntegerReference: + cGenRegs++; + break; + case SystemVClassificationTypeSSE: + cFPRegs++; + break; + default: + _ASSERTE(false); + break; + } + } + + // Check if we have enough registers available for the struct passing + if ((cFPRegs + m_idxFPReg <= NUM_FLOAT_ARGUMENT_REGISTERS) && (cGenRegs + m_idxGenReg) <= NUM_ARGUMENT_REGISTERS) + { + m_argLocDescForStructInRegs.Init(); + m_argLocDescForStructInRegs.m_cGenReg = cGenRegs; + m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs; + m_argLocDescForStructInRegs.m_idxGenReg = m_idxGenReg; + m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg; + m_argLocDescForStructInRegs.m_eeClass = eeClass; + + m_hasArgLocDescForStructInRegs = true; + + m_idxGenReg += cGenRegs; + m_idxFPReg += cFPRegs; + + return TransitionBlock::StructInRegsOffset; + } + } + + // Set the register counts to indicate that this argument will not be passed in registers + cFPRegs = 0; + cGenRegs = 0; + +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + argSize = sizeof(TADDR); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + break; } @@ -888,33 +1030,31 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset() break; } - int cbArg = StackElemSize(argSize); - int cArgSlots = cbArg / STACK_ELEM_SIZE; - - if (cFPRegs>0) + if ((cFPRegs > 0) && (cFPRegs + m_idxFPReg <= NUM_FLOAT_ARGUMENT_REGISTERS)) { - if (cFPRegs + m_idxFPReg <= 8) - { - int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8; - m_idxFPReg += cFPRegs; - return argOfs; - } + int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8; + m_idxFPReg += cFPRegs; + return argOfs; } - else + else if ((cGenRegs > 0) && (m_idxGenReg + cGenRegs <= NUM_ARGUMENT_REGISTERS)) { - if (m_idxGenReg + cArgSlots <= 6) - { - int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - m_idxGenReg += cArgSlots; - return argOfs; - } + int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; + m_idxGenReg += cGenRegs; + return argOfs; } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 8; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + m_fArgInRegisters = false; +#endif + + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * STACK_ELEM_SIZE; + + int cArgSlots = cbArg / STACK_ELEM_SIZE; m_idxStack += cArgSlots; + return argOfs; #else - // Each argument takes exactly one slot on AMD64 + // Each argument takes exactly one slot on AMD64 on Windows int argOfs = m_curOfs; m_curOfs += sizeof(void *); return argOfs; @@ -1203,6 +1343,40 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags() { _ASSERTE(!thValueType.IsNull()); +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + MethodTable *pMT = thValueType.AsMethodTable(); + if (pMT->IsRegPassedStruct()) + { + EEClass* eeClass = pMT->GetClass(); + + if (eeClass->GetNumberEightBytes() == 1) + { + // Structs occupying just one eightbyte are treated as int / double + if (eeClass->GetEightByteClassification(0) == SystemVClassificationTypeSSE) + { + flags |= sizeof(double) << RETURN_FP_SIZE_SHIFT; + } + } + else + { + // Size of the struct is 16 bytes + flags |= (16 << RETURN_FP_SIZE_SHIFT); + // The lowest two bits of the size encode the order of the int and SSE fields + if (eeClass->GetEightByteClassification(0) == SystemVClassificationTypeSSE) + { + flags |= (1 << RETURN_FP_SIZE_SHIFT); + } + + if (eeClass->GetEightByteClassification(1) == SystemVClassificationTypeSSE) + { + flags |= (2 << RETURN_FP_SIZE_SHIFT); + } + } + + break; + } +#else // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef FEATURE_HFA if (thValueType.IsHFA() && !this->IsVarArg()) { @@ -1229,6 +1403,7 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags() if (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE) break; +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING } #endif // ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE @@ -1348,22 +1523,32 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ForceSigWalk() int maxOffset = TransitionBlock::GetOffsetOfArgs(); - int ofs; + int ofs; while (TransitionBlock::InvalidOffset != (ofs = GetNextOffset())) { int stackElemSize; #ifdef _TARGET_AMD64_ +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (m_fArgInRegisters) + { + // Arguments passed in registers don't consume any stack + continue; + } + + stackElemSize = StackElemSize(GetArgSize()); +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING // All stack arguments take just one stack slot on AMD64 because of arguments bigger // than a stack slot are passed by reference. stackElemSize = STACK_ELEM_SIZE; -#else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#else // _TARGET_AMD64_ stackElemSize = StackElemSize(GetArgSize()); #if defined(ENREGISTERED_PARAMTYPE_MAXSIZE) if (IsArgPassedByRef()) stackElemSize = STACK_ELEM_SIZE; #endif -#endif +#endif // _TARGET_AMD64_ int endOfs = ofs + stackElemSize; if (endOfs > maxOffset) diff --git a/src/vm/class.cpp b/src/vm/class.cpp index 932f8bed00..f45e6ebbfa 100644 --- a/src/vm/class.cpp +++ b/src/vm/class.cpp @@ -1679,7 +1679,7 @@ CorElementType MethodTable::GetHFAType() default: // This should never happen. MethodTable::IsHFA() should be set only on types - // that have a valid HFA type + // that have a valid HFA type when the flag is used to track HFA status. _ASSERTE(false); return ELEMENT_TYPE_END; } diff --git a/src/vm/class.h b/src/vm/class.h index 758a0dbaee..c53cf8ba72 100644 --- a/src/vm/class.h +++ b/src/vm/class.h @@ -428,21 +428,26 @@ class EEClassLayoutInfo // to its unmanaged counterpart (i.e. no internal reference fields, // no ansi-unicode char conversions required, etc.) Used to // optimize marshaling. - e_BLITTABLE = 0x01, + e_BLITTABLE = 0x01, // Post V1.0 addition: Is this type also sequential in managed memory? - e_MANAGED_SEQUENTIAL = 0x02, + e_MANAGED_SEQUENTIAL = 0x02, // When a sequential/explicit type has no fields, it is conceptually // zero-sized, but actually is 1 byte in length. This holds onto this // fact and allows us to revert the 1 byte of padding when another // explicit type inherits from this type. - e_ZERO_SIZED = 0x04, + e_ZERO_SIZED = 0x04, // The size of the struct is explicitly specified in the meta-data. - e_HAS_EXPLICIT_SIZE = 0x08, - + e_HAS_EXPLICIT_SIZE = 0x08, +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +#ifdef FEATURE_HFA +#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time. +#endif // FEATURE_HFA + e_NATIVE_PASS_IN_REGISTERS = 0x10, // Flag wheter a native struct is passed in registers. +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF #ifdef FEATURE_HFA // HFA type of the unmanaged layout - e_R4_HFA = 0x10, - e_R8_HFA = 0x20, + e_R4_HFA = 0x10, + e_R8_HFA = 0x20, #endif }; @@ -527,6 +532,14 @@ class EEClassLayoutInfo return m_cbPackingSize; } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + bool IsNativeStructPassedInRegisters() + { + LIMITED_METHOD_CONTRACT; + return (m_bFlags & e_NATIVE_PASS_IN_REGISTERS) != 0; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + #ifdef FEATURE_HFA bool IsNativeHFA() { @@ -579,6 +592,14 @@ class EEClassLayoutInfo m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA; } #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + void SetNativeStructPassedInRegisters() + { + LIMITED_METHOD_CONTRACT; + m_bFlags |= e_NATIVE_PASS_IN_REGISTERS; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + }; @@ -713,6 +734,15 @@ class EEClassOptionalFields SecurityProperties m_SecProps; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Number of eightBytes in the following arrays + int m_numberEightBytes; + // Classification of the eightBytes + SystemVClassificationType m_eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + // Size of data the eightBytes + unsigned int m_eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + // Set default values for optional fields. inline void Init(); }; @@ -1811,6 +1841,45 @@ public: GetOptionalFields()->m_dwReliabilityContract = dwValue; } +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Get number of eightbytes used by a struct passed in registers. + inline int GetNumberEightBytes() + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(HasOptionalFields()); + return GetOptionalFields()->m_numberEightBytes; + } + + // Get eightbyte classification for the eightbyte with the specified index. + inline SystemVClassificationType GetEightByteClassification(int index) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(HasOptionalFields()); + return GetOptionalFields()->m_eightByteClassifications[index]; + } + + // Get size of the data in the eightbyte with the specified index. + inline unsigned int GetEightByteSize(int index) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(HasOptionalFields()); + return GetOptionalFields()->m_eightByteSizes[index]; + } + + // Set the eightByte classification + inline void SetEightByteClassification(int eightByteCount, SystemVClassificationType *eightByteClassifications, unsigned int *eightByteSizes) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(HasOptionalFields()); + GetOptionalFields()->m_numberEightBytes = eightByteCount; + for (int i = 0; i < eightByteCount; i++) + { + GetOptionalFields()->m_eightByteClassifications[i] = eightByteClassifications[i]; + GetOptionalFields()->m_eightByteSizes[i] = eightByteSizes[i]; + } + } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifdef FEATURE_COMINTEROP inline TypeHandle GetCoClassForInterface() { diff --git a/src/vm/class.inl b/src/vm/class.inl index 12c5230fd2..a4c8276476 100644 --- a/src/vm/class.inl +++ b/src/vm/class.inl @@ -53,6 +53,9 @@ inline void EEClassOptionalFields::Init() m_cbModuleDynamicID = MODULE_NON_DYNAMIC_STATICS; m_dwReliabilityContract = RC_NULL; m_SecProps = 0; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + m_numberEightBytes = 0; +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING } #endif // !DACCESS_COMPILE diff --git a/src/vm/comdelegate.cpp b/src/vm/comdelegate.cpp index a6c7e063b1..80742cdaca 100644 --- a/src/vm/comdelegate.cpp +++ b/src/vm/comdelegate.cpp @@ -72,37 +72,149 @@ static UINT16 ShuffleOfs(INT ofs, UINT stackSizeDelta = 0) #else // Portable default implementation -// Helpers used when calculating shuffle array entries in GenerateShuffleArray below. - -// Return true if the current argument still has slots left to shuffle in general registers or on the stack -// (currently we never shuffle floating point registers since there's no need). -static bool AnythingToShuffle(ArgLocDesc * pArg) +// Iterator for extracting shuffle entries for argument desribed by an ArgLocDesc. +// Used when calculating shuffle array entries in GenerateShuffleArray below. +class ShuffleIterator { - return (pArg->m_cGenReg > 0) || (pArg->m_cStack > 0); -} + // Argument location description + ArgLocDesc* m_argLocDesc; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Current eightByte used for struct arguments in registers + int m_currentEightByte; +#endif + // Current general purpose register index (relative to the ArgLocDesc::m_idxGenReg) + int m_currentGenRegIndex; + // Current floating point register index (relative to the ArgLocDesc::m_idxFloatReg) + int m_currentFloatRegIndex; + // Current stack slot index (relative to the ArgLocDesc::m_idxStack) + int m_currentStackSlotIndex; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // Get next shuffle offset for struct passed in registers. There has to be at least one offset left. + UINT16 GetNextOfsInStruct() + { + EEClass* eeClass = m_argLocDesc->m_eeClass; + _ASSERTE(eeClass != NULL); + + if (m_currentEightByte < eeClass->GetNumberEightBytes()) + { + SystemVClassificationType eightByte = eeClass->GetEightByteClassification(m_currentEightByte); + unsigned int eightByteSize = eeClass->GetEightByteSize(m_currentEightByte); -// Return an encoded shuffle entry describing a general register or stack offset that needs to be shuffled. -static UINT16 ShuffleOfs(ArgLocDesc * pArg) -{ - // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot - // over a register we later need to shuffle down as well). - if (pArg->m_cGenReg > 0) - { - pArg->m_cGenReg--; - return (UINT16)(ShuffleEntry::REGMASK | pArg->m_idxGenReg++); + m_currentEightByte++; + + int index; + UINT16 mask = ShuffleEntry::REGMASK; + + if (eightByte == SystemVClassificationTypeSSE) + { + _ASSERTE(m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg); + index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex; + m_currentFloatRegIndex++; + + mask |= ShuffleEntry::FPREGMASK; + if (eightByteSize == 4) + { + mask |= ShuffleEntry::FPSINGLEMASK; + } + } + else + { + _ASSERTE(m_currentGenRegIndex < m_argLocDesc->m_cGenReg); + index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex; + m_currentGenRegIndex++; + } + + return (UINT16)index | mask; + } + + // There are no more offsets to get, the caller should not have called us + _ASSERTE(false); + return 0; } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING - // If we get here we must have at least one stack slot left to shuffle (this method should only be called - // when AnythingToShuffle(pArg) == true). - _ASSERTE(pArg->m_cStack > 0); - pArg->m_cStack--; +public: - // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. - if (pArg->m_idxStack >= ShuffleEntry::REGMASK) - COMPlusThrow(kNotSupportedException); + // Construct the iterator for the ArgLocDesc + ShuffleIterator(ArgLocDesc* argLocDesc) + : + m_argLocDesc(argLocDesc), +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + m_currentEightByte(0), +#endif + m_currentGenRegIndex(0), + m_currentFloatRegIndex(0), + m_currentStackSlotIndex(0) + { + } - return (UINT16)(pArg->m_idxStack++); -} + // Check if there are more offsets to shuffle + bool HasNextOfs() + { + return (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) || +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) || +#endif + (m_currentStackSlotIndex < m_argLocDesc->m_cStack); + } + + // Get next offset to shuffle. There has to be at least one offset left. + UINT16 GetNextOfs() + { + int index; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + // Check if the argLocDesc is for a struct in registers + EEClass* eeClass = m_argLocDesc->m_eeClass; + if (m_argLocDesc->m_eeClass != 0) + { + return GetNextOfsInStruct(); + } + + // Shuffle float registers first + if (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) + { + index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex; + m_currentFloatRegIndex++; + + return (UINT16)index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK; + } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + + // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot + // over a register we later need to shuffle down as well). + if (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) + { + index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex; + m_currentGenRegIndex++; + + return (UINT16)index | ShuffleEntry::REGMASK; + } + + // If we get here we must have at least one stack slot left to shuffle (this method should only be called + // when AnythingToShuffle(pArg) == true). + if (m_currentStackSlotIndex < m_argLocDesc->m_cStack) + { + index = m_argLocDesc->m_idxStack + m_currentStackSlotIndex; + m_currentStackSlotIndex++; + + // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. + if (index >= ShuffleEntry::REGMASK) + { + COMPlusThrow(kNotSupportedException); + } + + return (UINT16)index; + } + + // There are no more offsets to get, the caller should not have called us + _ASSERTE(false); + return 0; + } +}; #endif @@ -247,8 +359,11 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S sArgPlacerSrc.GetThisLoc(&sArgDst); - entry.srcofs = ShuffleOfs(&sArgSrc); - entry.dstofs = ShuffleOfs(&sArgDst); + ShuffleIterator iteratorSrc(&sArgSrc); + ShuffleIterator iteratorDst(&sArgDst); + + entry.srcofs = iteratorSrc.GetNextOfs(); + entry.dstofs = iteratorDst.GetNextOfs(); pShuffleEntryArray->Append(entry); } @@ -261,8 +376,11 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S sArgPlacerSrc.GetRetBuffArgLoc(&sArgSrc); sArgPlacerDst.GetRetBuffArgLoc(&sArgDst); - entry.srcofs = ShuffleOfs(&sArgSrc); - entry.dstofs = ShuffleOfs(&sArgDst); + ShuffleIterator iteratorSrc(&sArgSrc); + ShuffleIterator iteratorDst(&sArgDst); + + entry.srcofs = iteratorSrc.GetNextOfs(); + entry.dstofs = iteratorDst.GetNextOfs(); // Depending on the type of target method (static vs instance) the return buffer argument may end up // in the same register in both signatures. So we only commit the entry (by moving the entry pointer @@ -271,34 +389,76 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S pShuffleEntryArray->Append(entry); } - // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding - // destination locations. - while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset) - { - ofsDst = sArgPlacerDst.GetNextOffset(); +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // The shuffle entries are produced in two passes on Unix AMD64. The first pass generates shuffle entries for + // all cases except of shuffling struct argument from stack to registers, which is performed in the second pass + // The reason is that if such structure argument contained floating point field and it was followed by a + // floating point argument, generating code for transferring the structure from stack into registers would + // overwrite the xmm register of the floating point argument before it could actually be shuffled. + // For example, consider this case: + // struct S { int x; float y; }; + // void fn(long a, long b, long c, long d, long e, S f, float g); + // src: rdi = this, rsi = a, rdx = b, rcx = c, r8 = d, r9 = e, stack: f, xmm0 = g + // dst: rdi = a, rsi = b, rdx = c, rcx = d, r8 = e, r9 = S.x, xmm0 = s.y, xmm1 = g + for (int pass = 0; pass < 2; pass++) +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + { + // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding + // destination locations. + while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset) + { + ofsDst = sArgPlacerDst.GetNextOffset(); - // Find the argument location mapping for both source and destination signature. A single argument can - // occupy a floating point register (in which case we don't need to do anything, they're not shuffled) - // or some combination of general registers and the stack. - sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc); - sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst); +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + bool shuffleStructFromStackToRegs = (ofsSrc != TransitionBlock::StructInRegsOffset) && (ofsDst == TransitionBlock::StructInRegsOffset); + if (((pass == 0) && shuffleStructFromStackToRegs) || + ((pass == 1) && !shuffleStructFromStackToRegs)) + { + continue; + } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + // Find the argument location mapping for both source and destination signature. A single argument can + // occupy a floating point register (in which case we don't need to do anything, they're not shuffled) + // or some combination of general registers and the stack. + sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc); + sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst); + + ShuffleIterator iteratorSrc(&sArgSrc); + ShuffleIterator iteratorDst(&sArgDst); + + // Shuffle each slot in the argument (register or stack slot) from source to destination. + while (iteratorSrc.HasNextOfs()) + { + // Locate the next slot to shuffle in the source and destination and encode the transfer into a + // shuffle entry. + entry.srcofs = iteratorSrc.GetNextOfs(); + entry.dstofs = iteratorDst.GetNextOfs(); + + // Only emit this entry if it's not a no-op (i.e. the source and destination locations are + // different). + if (entry.srcofs != entry.dstofs) + pShuffleEntryArray->Append(entry); + } - // Shuffle each slot in the argument (register or stack slot) from source to destination. - while (AnythingToShuffle(&sArgSrc)) + // We should have run out of slots to shuffle in the destination at the same time as the source. + _ASSERTE(!iteratorDst.HasNextOfs()); + } +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (pass == 0) { - // Locate the next slot to shuffle in the source and destination and encode the transfer into a - // shuffle entry. - entry.srcofs = ShuffleOfs(&sArgSrc); - entry.dstofs = ShuffleOfs(&sArgDst); + // Reset the iterator for the 2nd pass + sSigSrc.Reset(); + sSigDst.Reset(); - // Only emit this entry if it's not a no-op (i.e. the source and destination locations are - // different). - if (entry.srcofs != entry.dstofs) - pShuffleEntryArray->Append(entry); - } + sArgPlacerSrc = ArgIterator(&sSigSrc); + sArgPlacerDst = ArgIterator(&sSigDst); - // We should have run out of slots to shuffle in the destination at the same time as the source. - _ASSERTE(!AnythingToShuffle(&sArgDst)); + if (sSigDst.HasThis()) + { + sArgPlacerSrc.GetNextOffset(); + } + } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING } entry.srcofs = ShuffleEntry::SENTINEL; @@ -1323,7 +1483,7 @@ OBJECTREF COMDelegate::ConvertToDelegate(LPVOID pCallback, MethodTable* pMT) // Lookup the callsite in the hash, if found, we can map this call back to its managed function. // Otherwise, we'll treat this as an unmanaged callsite. - // Make sure that the pointer doesn't have the value of 1 which is our hash table deleted item marker. + // Make sure that the pointer doesn't have the value of 1 which is our hash table deleted item marker. LPVOID DelegateHnd = (pUMEntryThunk != NULL) && ((UPTR)pUMEntryThunk != (UPTR)1) ? COMDelegate::s_pDelegateToFPtrHash->LookupValue((UPTR)pUMEntryThunk, 0) : (LPVOID)INVALIDENTRY; diff --git a/src/vm/comdelegate.h b/src/vm/comdelegate.h index cfb9afa783..ab8ca04338 100644 --- a/src/vm/comdelegate.h +++ b/src/vm/comdelegate.h @@ -211,10 +211,14 @@ void DistributeUnhandledExceptionReliably(OBJECTREF *pDelegate, // signature. struct ShuffleEntry { + // Offset masks and special value enum { - REGMASK = 0x8000, - OFSMASK = 0x7fff, - SENTINEL = 0xffff, + REGMASK = 0x8000, // Register offset bit + FPREGMASK = 0x4000, // Floating point register bit + FPSINGLEMASK = 0x2000, // Single precising floating point register + OFSMASK = 0x7fff, // Mask to get stack offset + OFSREGMASK = 0x1fff, // Mask to get register index + SENTINEL = 0xffff, // Indicates end of shuffle array }; #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) @@ -224,17 +228,11 @@ struct ShuffleEntry }; #else - // Special values: - // -1 - indicates end of shuffle array: stacksizedelta - // == difference in stack size between virtual and static sigs. - // high bit - indicates a register argument: mask it off and - // the result is an offset into ArgumentRegisters. - UINT16 srcofs; union { UINT16 dstofs; //if srcofs != SENTINEL - UINT16 stacksizedelta; //if dstofs == SENTINEL + UINT16 stacksizedelta; //if dstofs == SENTINEL, difference in stack size between virtual and static sigs }; #endif // _TARGET_AMD64_ }; diff --git a/src/vm/compile.cpp b/src/vm/compile.cpp index 5b33792d35..23242df1db 100644 --- a/src/vm/compile.cpp +++ b/src/vm/compile.cpp @@ -76,6 +76,8 @@ #endif #include "tritonstress.h" +#include "argdestination.h" + #ifdef CROSSGEN_COMPILE CompilationDomain * theDomain; #endif @@ -1483,7 +1485,8 @@ void FakeGcScanRoots(MetaSig& msig, ArgIterator& argit, MethodDesc * pMD, BYTE * int argOffset; while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset) { - msig.GcScanRoots(pFrame + argOffset, &FakePromote, &sc, &FakePromoteCarefully); + ArgDestination argDest(pFrame, argOffset, argit.GetArgLocDescForStructInRegs()); + msig.GcScanRoots(&argDest, &FakePromote, &sc, &FakePromoteCarefully); } } @@ -1933,7 +1936,17 @@ BOOL CanDeduplicateCode(CORINFO_METHOD_HANDLE method, CORINFO_METHOD_HANDLE dupl return FALSE; #endif // _TARGET_X86_ - if (pMethod->ReturnsObject() != pDuplicateMethod->ReturnsObject()) + MetaSig::RETURNTYPE returnType = pMethod->ReturnsObject(); + MetaSig::RETURNTYPE returnTypeDuplicate = pDuplicateMethod->ReturnsObject(); + + if (returnType != returnTypeDuplicate) + return FALSE; + + // + // Do not enable deduplication of structs returned in registers + // + + if (returnType == MetaSig::RETVALUETYPE) return FALSE; // diff --git a/src/vm/crossdomaincalls.cpp b/src/vm/crossdomaincalls.cpp index fa04b57faa..dd695fe5f1 100644 --- a/src/vm/crossdomaincalls.cpp +++ b/src/vm/crossdomaincalls.cpp @@ -1264,7 +1264,7 @@ CrossDomainChannel::BlitAndCall() MetaSig mSig(m_pCliMD, thDeclaringType); ArgIterator argit(&mSig); - int offset; + int offset; while (TransitionBlock::InvalidOffset != (offset = argit.GetNextOffset())) { int regArgNum = TransitionBlock::GetArgumentIndexFromOffset(offset); @@ -2068,7 +2068,7 @@ CrossDomainChannel::MarshalAndCall() CDC_DETERMINE_DECLARING_TYPE(m_pCliMD, TypeHandle(CTPMethodTable::GetMethodTableBeingProxied(m_pFrame->GetThis()))); MetaSig mSig(m_pCliMD, thDeclaringType); ArgIterator argit(&mSig); - int ofs; + int ofs; // NumFixedArgs() doesn't count the "this" object, but SizeOfFrameArgumentArray() does. dwNumArgs = mSig.NumFixedArgs(); @@ -2141,7 +2141,7 @@ CrossDomainChannel::MarshalAndCall() TADDR pTransitionBlock = m_pFrame->GetTransitionBlock(); for (int argNum = 0; - TransitionBlock::InvalidOffset != (ofs = argit.GetNextOffset()); + TransitionBlock::InvalidOffset != (ofs = argit.GetNextOffset()); argNum++ ) { diff --git a/src/vm/eetwain.cpp b/src/vm/eetwain.cpp index 5df7b6305a..dbbfac9000 100644 --- a/src/vm/eetwain.cpp +++ b/src/vm/eetwain.cpp @@ -18,6 +18,7 @@ #include "gcinfodecoder.h" #endif +#include "argdestination.h" #define X86_INSTR_W_TEST_ESP 0x4485 // test [esp+N], eax #define X86_INSTR_TEST_ESP_SIB 0x24 @@ -4071,7 +4072,10 @@ void promoteVarArgs(PTR_BYTE argsStart, PTR_VASigCookie varArgSig, GCCONTEXT* ct // if skipFixedArgs is false we report all arguments // otherwise we just report the varargs. if (!skipFixedArgs || inVarArgs) - msig.GcScanRoots(pFrameBase + argOffset, ctx->f, ctx->sc); + { + ArgDestination argDest(pFrameBase, argOffset, argit.GetArgLocDescForStructInRegs()); + msig.GcScanRoots(&argDest, ctx->f, ctx->sc); + } } } diff --git a/src/vm/fcall.h b/src/vm/fcall.h index 2bf6080706..8cfcc3e68e 100644 --- a/src/vm/fcall.h +++ b/src/vm/fcall.h @@ -1318,9 +1318,8 @@ typedef UINT16 FC_UINT16_RET; // FC_TypedByRef should be used for TypedReferences in FCall signatures -#ifdef UNIX_AMD64_ABI +#if defined(UNIX_AMD64_ABI) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Explicitly pass the TypedReferences by reference -// UNIXTODO: Remove once the proper managed calling convention for struct is in place #define FC_TypedByRef TypedByRef& #define FC_DECIMAL DECIMAL& #else diff --git a/src/vm/field.h b/src/vm/field.h index a278c4d12c..9fc5583c2f 100644 --- a/src/vm/field.h +++ b/src/vm/field.h @@ -223,7 +223,6 @@ public: DWORD GetOffset() { LIMITED_METHOD_DAC_CONTRACT; - g_IBCLogger.LogFieldDescsAccess(this); return GetOffset_NoLogging(); } diff --git a/src/vm/fieldmarshaler.h b/src/vm/fieldmarshaler.h index d67637e27c..ee464e4c05 100644 --- a/src/vm/fieldmarshaler.h +++ b/src/vm/fieldmarshaler.h @@ -396,7 +396,7 @@ public: m_dwExternalOffset = dwExternalOffset; } - UINT32 GetExternalOffset() + UINT32 GetExternalOffset() const { LIMITED_METHOD_CONTRACT; return m_dwExternalOffset; diff --git a/src/vm/frames.cpp b/src/vm/frames.cpp index 1c7f2f4348..f4d96e5f5d 100644 --- a/src/vm/frames.cpp +++ b/src/vm/frames.cpp @@ -45,6 +45,8 @@ #include "interpreter.h" #endif // FEATURE_INTERPRETER +#include "argdestination.h" + #if CHECK_APP_DOMAIN_LEAKS #define CHECK_APP_DOMAIN GC_CALL_CHECK_APP_DOMAIN #else @@ -1278,7 +1280,8 @@ void TransitionFrame::PromoteCallerStackHelper(promote_func* fn, ScanContext* sc int argOffset; while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset) { - pmsig->GcScanRoots(dac_cast<PTR_VOID>(pTransitionBlock + argOffset), fn, sc); + ArgDestination argDest(dac_cast<PTR_VOID>(pTransitionBlock), argOffset, argit.GetArgLocDescForStructInRegs()); + pmsig->GcScanRoots(&argDest, fn, sc); } } diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp index e42f7d792f..b86151243c 100644 --- a/src/vm/i386/stublinkerx86.cpp +++ b/src/vm/i386/stublinkerx86.cpp @@ -4001,16 +4001,49 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) { // If source is present in register then destination must also be a register _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); + // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose. + _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK)); - X86EmitMovRegReg(c_argRegs[pEntry->dstofs & ShuffleEntry::OFSMASK], c_argRegs[pEntry->srcofs & ShuffleEntry::OFSMASK]); + int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK; + + if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + { + // movdqa dstReg, srcReg + X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex)); + } + else + { + // mov dstReg, srcReg + X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]); + } } else if (pEntry->dstofs & ShuffleEntry::REGMASK) { // source must be on the stack _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); - // mov dstreg, [rax + src] - X86EmitIndexRegLoad(c_argRegs[pEntry->dstofs & ShuffleEntry::OFSMASK], SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*)); + int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + int srcOffset = (pEntry->srcofs + 1) * sizeof(void*); + + if (pEntry->dstofs & ShuffleEntry::FPREGMASK) + { + if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK) + { + // movss dstReg, [rax + src] + X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset); + } + else + { + // movsd dstReg, [rax + src] + X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset); + } + } + else + { + // mov dstreg, [rax + src] + X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset); + } } else { diff --git a/src/vm/ilmarshalers.h b/src/vm/ilmarshalers.h index 5a2453b603..1bd072f417 100644 --- a/src/vm/ilmarshalers.h +++ b/src/vm/ilmarshalers.h @@ -601,7 +601,7 @@ public: nativeSize = wNativeSize; } -#ifndef _TARGET_ARM_ +#if !defined(_TARGET_ARM) && !(defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) switch (nativeSize) { case 1: typ = ELEMENT_TYPE_U1; break; diff --git a/src/vm/invokeutil.cpp b/src/vm/invokeutil.cpp index ee80056abe..e17458ce1d 100644 --- a/src/vm/invokeutil.cpp +++ b/src/vm/invokeutil.cpp @@ -28,6 +28,7 @@ #include "eeconfig.h" #include "generics.h" #include "runtimehandles.h" +#include "argdestination.h" #ifndef CROSSGEN_COMPILE @@ -130,7 +131,7 @@ void *InvokeUtil::GetIntPtrValue(OBJECTREF pObj) { RETURN *(void **)((pObj)->UnBox()); } -void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) { +void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, ArgDestination *argDest) { CONTRACTL { THROWS; GC_NOTRIGGER; // Caller does not protect object references @@ -140,7 +141,9 @@ void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) { INJECT_FAULT(COMPlusThrowOM()); } CONTRACTL_END; - + + void *pArgDst = argDest->GetDestinationAddress(); + OBJECTREF rObj = *pObjUNSAFE; MethodTable* pMT; CorElementType oType; @@ -204,12 +207,12 @@ void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) { case ELEMENT_TYPE_VALUETYPE: { - // If we got the univeral zero...Then assign it and exit. + // If we got the universal zero...Then assign it and exit. if (rObj == 0) { - InitValueClass(pArgDst, th.AsMethodTable()); + InitValueClassArg(argDest, th.AsMethodTable()); } else { - if (!th.AsMethodTable()->UnBoxInto(pArgDst, rObj)) + if (!th.AsMethodTable()->UnBoxIntoArg(argDest, rObj)) COMPlusThrow(kArgumentException, W("Arg_ObjObj")); } break; diff --git a/src/vm/invokeutil.h b/src/vm/invokeutil.h index f2acb61f9e..14d7dc8e14 100644 --- a/src/vm/invokeutil.h +++ b/src/vm/invokeutil.h @@ -44,6 +44,7 @@ struct InterfaceMapData #include <poppack.h> class ReflectMethodList; +class ArgDestination; // Structure used to track security access checks efficiently when applied // across a range of methods, fields etc. @@ -114,7 +115,7 @@ class InvokeUtil { public: - static void CopyArg(TypeHandle th, OBJECTREF *obj, void *pArgDst); + static void CopyArg(TypeHandle th, OBJECTREF *obj, ArgDestination *argDest); // Given a type, this routine will convert an return value representing that // type into an ObjectReference. If the type is a primitive, the diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp index ba6aebb3cc..442fb91186 100644 --- a/src/vm/jitinterface.cpp +++ b/src/vm/jitinterface.cpp @@ -58,7 +58,6 @@ #include "runtimehandles.h" #include "sigbuilder.h" #include "openum.h" - #ifdef HAVE_GCCOVER #include "gccover.h" #endif // HAVE_GCCOVER @@ -1651,7 +1650,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, DWORD fieldFlags = 0; pResult->offset = pField->GetOffset(); - if (pField->IsStatic()) { #ifdef FEATURE_LEGACYNETCF @@ -1850,7 +1848,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, if (!(flags & CORINFO_ACCESS_INLINECHECK)) { - //get the field's type. Grab the class for structs. pResult->fieldType = getFieldTypeInternal(pResolvedToken->hField, &pResult->structType, pResolvedToken->hClass); @@ -2568,9 +2565,82 @@ bool CEEInfo::getSystemVAmd64PassStructInRegisterDescriptor( /*IN*/ CORINFO_CLASS_HANDLE structHnd, /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr) { - LIMITED_METHOD_CONTRACT; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + JIT_TO_EE_TRANSITION(); + + _ASSERTE(structPassInRegDescPtr != nullptr); + TypeHandle th(structHnd); + + // Make sure this is a value type. + if (th.IsValueType()) + { + _ASSERTE(CorInfoType2UnixAmd64Classification(th.GetInternalCorElementType()) == SystemVClassificationTypeStruct); + + MethodTable* methodTablePtr = nullptr; + bool isNativeStruct = false; + if (!th.IsTypeDesc()) + { + methodTablePtr = th.AsMethodTable(); + _ASSERTE(methodTablePtr != nullptr); + } + else if (th.IsTypeDesc()) + { + if (th.IsNativeValueType()) + { + methodTablePtr = th.AsNativeValueType(); + isNativeStruct = true; + _ASSERTE(methodTablePtr != nullptr); + } + else + { + _ASSERTE(false && "Unhandled TypeHandle for struct!"); + } + } + + bool isPassableInRegs = false; + + if (isNativeStruct) + { + isPassableInRegs = methodTablePtr->GetLayoutInfo()->IsNativeStructPassedInRegisters(); + } + else + { + isPassableInRegs = methodTablePtr->IsRegPassedStruct(); + } + + if (!isPassableInRegs) + { + structPassInRegDescPtr->passedInRegisters = false; + } + else + { + structPassInRegDescPtr->passedInRegisters = true; + + SystemVStructRegisterPassingHelper helper((unsigned int)th.GetSize()); + bool result = methodTablePtr->ClassifyEightBytes(&helper, 0, 0); + + structPassInRegDescPtr->eightByteCount = helper.eightByteCount; + _ASSERTE(structPassInRegDescPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + for (unsigned int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++) + { + structPassInRegDescPtr->eightByteClassifications[i] = helper.eightByteClassifications[i]; + structPassInRegDescPtr->eightByteSizes[i] = helper.eightByteSizes[i]; + structPassInRegDescPtr->eightByteOffsets[i] = helper.eightByteOffsets[i]; + } + } + } + else + { + structPassInRegDescPtr->passedInRegisters = false; + } + + EE_TO_JIT_TRANSITION(); + + return true; +#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) return false; +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) } /*********************************************************************/ diff --git a/src/vm/message.cpp b/src/vm/message.cpp index d8bdb3d2c8..dab78f46e6 100644 --- a/src/vm/message.cpp +++ b/src/vm/message.cpp @@ -752,7 +752,7 @@ FCIMPL2(FC_BOOL_RET, CMessage::Dispatch, MessageObject* pMessageUNSAFE, Object* int ofs; while ((ofs = argit.GetNextOffset()) != TransitionBlock::InvalidOffset) { - if (TransitionBlock::IsFloatArgumentRegisterOffset(ofs)) + if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs())) { // Found a floating point argument register. The first time we find this we point // pFloatArgumentRegisters to the part of the frame where these values were spilled (we don't do @@ -772,7 +772,7 @@ FCIMPL2(FC_BOOL_RET, CMessage::Dispatch, MessageObject* pMessageUNSAFE, Object* DWORD_PTR dwRegTypeMap = 0; { - int ofs; + int ofs; while ((ofs = argit.GetNextOffset()) != TransitionBlock::InvalidOffset) { int regArgNum = TransitionBlock::GetArgumentIndexFromOffset(ofs); diff --git a/src/vm/method.cpp b/src/vm/method.cpp index 6926ce4b6e..3e7271b1fb 100644 --- a/src/vm/method.cpp +++ b/src/vm/method.cpp @@ -1396,8 +1396,9 @@ COR_ILMETHOD* MethodDesc::GetILHeader(BOOL fAllowOverrides /*=FALSE*/) //******************************************************************************* MetaSig::RETURNTYPE MethodDesc::ReturnsObject( #ifdef _DEBUG - bool supportStringConstructors + bool supportStringConstructors, #endif + MethodTable** pMT ) { CONTRACTL @@ -1439,7 +1440,19 @@ MetaSig::RETURNTYPE MethodDesc::ReturnsObject( if (!thValueType.IsTypeDesc()) { MethodTable * pReturnTypeMT = thValueType.AsMethodTable(); - if(pReturnTypeMT->ContainsPointers()) + if (pMT != NULL) + { + *pMT = pReturnTypeMT; + } + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (pReturnTypeMT->IsRegPassedStruct()) + { + return MetaSig::RETVALUETYPE; + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (pReturnTypeMT->ContainsPointers()) { _ASSERTE(pReturnTypeMT->GetNumInstanceFieldBytes() == sizeof(void*)); return MetaSig::RETOBJ; diff --git a/src/vm/method.hpp b/src/vm/method.hpp index 0f283e5c79..680662b94c 100644 --- a/src/vm/method.hpp +++ b/src/vm/method.hpp @@ -1611,8 +1611,9 @@ public: // does this function return an object reference? MetaSig::RETURNTYPE ReturnsObject( #ifdef _DEBUG - bool supportStringConstructors = false + bool supportStringConstructors = false, #endif + MethodTable** pMT = NULL ); diff --git a/src/vm/methodtable.cpp b/src/vm/methodtable.cpp index de660268e4..e632ce3700 100644 --- a/src/vm/methodtable.cpp +++ b/src/vm/methodtable.cpp @@ -39,9 +39,12 @@ #include "dbginterface.h" #include "comdelegate.h" #include "eventtrace.h" +#include "fieldmarshaler.h" + #ifdef FEATURE_REMOTING #include "remoting.h" #endif + #include "eeprofinterfaces.h" #include "dllimportcallback.h" #include "listlock.h" @@ -2275,6 +2278,916 @@ BOOL MethodTable::IsClassPreInited() #pragma optimize("", on) #endif // _MSC_VER +//======================================================================================== + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + +#if defined(_DEBUG) && defined(LOGGING) +static +const char* GetSystemVClassificationTypeName(SystemVClassificationType t) +{ + switch (t) + { + case SystemVClassificationTypeUnknown: return "Unknown"; + case SystemVClassificationTypeStruct: return "Struct"; + case SystemVClassificationTypeNoClass: return "NoClass"; + case SystemVClassificationTypeMemory: return "Memory"; + case SystemVClassificationTypeInteger: return "Integer"; + case SystemVClassificationTypeIntegerReference: return "IntegerReference"; + case SystemVClassificationTypeSSE: return "SSE"; + default: return "ERROR"; + } +}; +#endif // _DEBUG && LOGGING + +// If we have a field classification already, but there is a union, we must merge the classification type of the field. Returns the +// new, merged classification type. +/* static */ +SystemVClassificationType MethodTable::ReClassifyField(SystemVClassificationType originalClassification, SystemVClassificationType newFieldClassification) +{ + _ASSERTE((newFieldClassification == SystemVClassificationTypeInteger) || + (newFieldClassification == SystemVClassificationTypeIntegerReference) || + (newFieldClassification == SystemVClassificationTypeSSE)); + + switch (newFieldClassification) + { + case SystemVClassificationTypeInteger: + // Integer overrides everything; the resulting classification is Integer. Can't merge Integer and IntegerReference. + _ASSERTE((originalClassification == SystemVClassificationTypeInteger) || + (originalClassification == SystemVClassificationTypeSSE)); + + return SystemVClassificationTypeInteger; + + case SystemVClassificationTypeSSE: + // If the old and new classifications are both SSE, then the merge is SSE, otherwise it will be integer. Can't merge SSE and IntegerReference. + _ASSERTE((originalClassification == SystemVClassificationTypeInteger) || + (originalClassification == SystemVClassificationTypeSSE)); + + if (originalClassification == SystemVClassificationTypeSSE) + { + return SystemVClassificationTypeSSE; + } + else + { + return SystemVClassificationTypeInteger; + } + + case SystemVClassificationTypeIntegerReference: + // IntegerReference can only merge with IntegerReference. + _ASSERTE(originalClassification == SystemVClassificationTypeIntegerReference); + return SystemVClassificationTypeIntegerReference; + + default: + _ASSERTE(false); // Unexpected type. + return SystemVClassificationTypeUnknown; + } +} + +// Returns 'true' if the struct is passed in registers, 'false' otherwise. +bool MethodTable::ClassifyEightBytes(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + SO_TOLERANT; + MODE_ANY; + } + CONTRACTL_END; + + WORD numIntroducedFields = GetNumIntroducedInstanceFields(); + + // It appears the VM gives a struct with no fields of size 1. + // Don't pass in register such structure. + if (numIntroducedFields == 0) + { + return false; + } + + // No struct register passing with explicit layout. There may be cases where explicit layout may be still + // eligible for register struct passing, but it is hard to tell the real intent. Make it simple and just + // unconditionally disable register struct passing for explicit layout. + if (GetClass()->HasExplicitFieldOffsetLayout()) + { + LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytes: struct %s has explicit layout; will not be enregistered\n", + nestingLevel * 5, "", this->GetDebugClassName())); + return false; + } +#ifdef _DEBUG + LOG((LF_JIT, LL_EVERYTHING, "%*s**** Classify %s (%p), startOffset %d, total struct size %d\n", + nestingLevel * 5, "", this->GetDebugClassName(), this, startOffsetOfStruct, helperPtr->structSize)); + int fieldNum = -1; +#endif // _DEBUG + + FieldDesc *pField = GetApproxFieldDescListRaw(); + FieldDesc *pFieldEnd = pField + numIntroducedFields; + + for (; pField < pFieldEnd; pField++) + { +#ifdef _DEBUG + ++fieldNum; +#endif // _DEBUG + + DWORD fieldOffset = pField->GetOffset(); + unsigned normalizedFieldOffset = fieldOffset + startOffsetOfStruct; + + unsigned int fieldSize = pField->GetSize(); + _ASSERTE(fieldSize != (unsigned int)-1); + + // The field can't span past the end of the struct. + if ((normalizedFieldOffset + fieldSize) > helperPtr->structSize) + { + _ASSERTE(false && "Invalid struct size. The size of fields and overall size don't agree"); + return false; + } + + CorElementType fieldType = pField->GetFieldType(); + + SystemVClassificationType fieldClassificationType = CorInfoType2UnixAmd64Classification(fieldType); + +#ifdef _DEBUG + LPCUTF8 fieldName; + pField->GetName_NoThrow(&fieldName); +#endif // _DEBUG + + if (fieldClassificationType == SystemVClassificationTypeStruct) + { + TypeHandle th = pField->GetApproxFieldTypeHandleThrowing(); + _ASSERTE(!th.IsNull()); + MethodTable* pFieldMT = th.GetMethodTable(); + + bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct; + helperPtr->inEmbeddedStruct = true; + bool structRet = pFieldMT->ClassifyEightBytes(helperPtr, nestingLevel + 1, normalizedFieldOffset); + helperPtr->inEmbeddedStruct = inEmbeddedStructPrev; + + if (!structRet) + { + // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister. + return false; + } + + continue; + } + + if ((normalizedFieldOffset % fieldSize) != 0) + { + // The spec requires that struct values on the stack from register passed fields expects + // those fields to be at their natural alignment. + + LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Field %d %s: offset %d (normalized %d), size %d not at natural alignment; not enregistering struct\n", + nestingLevel * 5, "", fieldNum, fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize)); + return false; + } + + if ((int)normalizedFieldOffset <= helperPtr->largestFieldOffset) + { + // Find the field corresponding to this offset and update the size if needed. + // We assume that either it matches the offset of a previously seen field, or + // it is an out-of-order offset (the VM does give us structs in non-increasing + // offset order sometimes) that doesn't overlap any other field. + + // REVIEW: will the offset ever match a previously seen field offset for cases that are NOT ExplicitLayout? + // If not, we can get rid of this loop, and just assume the offset is from an out-of-order field. We wouldn't + // need to maintain largestFieldOffset, either, since we would then assume all fields are unique. We could + // also get rid of ReClassifyField(). + int i; + for (i = helperPtr->currentUniqueOffsetField - 1; i >= 0; i--) + { + if (helperPtr->fieldOffsets[i] == normalizedFieldOffset) + { + if (fieldSize > helperPtr->fieldSizes[i]) + { + helperPtr->fieldSizes[i] = fieldSize; + } + + helperPtr->fieldClassifications[i] = ReClassifyField(helperPtr->fieldClassifications[i], fieldClassificationType); + + LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Field %d %s: offset %d (normalized %d), size %d, union with uniqueOffsetField %d, field type classification %s, reclassified field to %s\n", + nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize, i, + GetSystemVClassificationTypeName(fieldClassificationType), + GetSystemVClassificationTypeName(helperPtr->fieldClassifications[i]))); + + break; + } + // Make sure the field doesn't start in the middle of another field. + _ASSERTE((normalizedFieldOffset < helperPtr->fieldOffsets[i]) || + (normalizedFieldOffset >= helperPtr->fieldOffsets[i] + helperPtr->fieldSizes[i])); + } + + if (i >= 0) + { + // The proper size of the union set of fields has been set above; continue to the next field. + continue; + } + } + else + { + helperPtr->largestFieldOffset = (int)normalizedFieldOffset; + } + + // Set the data for a new field. + + // The new field classification must not have been initialized yet. + _ASSERTE(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] == SystemVClassificationTypeNoClass); + + // There are only a few field classifications that are allowed. + _ASSERTE((fieldClassificationType == SystemVClassificationTypeInteger) || + (fieldClassificationType == SystemVClassificationTypeIntegerReference) || + (fieldClassificationType == SystemVClassificationTypeSSE)); + + helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = fieldClassificationType; + helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = fieldSize; + helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedFieldOffset; + + LOG((LF_JIT, LL_EVERYTHING, " %*s**** Field %d %s: offset %d (normalized %d), size %d, currentUniqueOffsetField %d, field type classification %s, chosen field classification %s\n", + nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize, helperPtr->currentUniqueOffsetField, + GetSystemVClassificationTypeName(fieldClassificationType), + GetSystemVClassificationTypeName(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField]))); + + helperPtr->currentUniqueOffsetField++; + _ASSERTE(helperPtr->currentUniqueOffsetField < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT); + } // end per-field for loop + + if (!helperPtr->inEmbeddedStruct) + { + _ASSERTE(nestingLevel == 0); + + // We're at the top level of the recursion, and we're done looking at the fields. + // Now sort the fields by offset and set the output data. + + int sortedFieldOrder[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + sortedFieldOrder[i] = -1; + } + + for (unsigned i = 0; i < helperPtr->currentUniqueOffsetField; i++) + { + _ASSERTE(helperPtr->fieldOffsets[i] < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT); + _ASSERTE(sortedFieldOrder[helperPtr->fieldOffsets[i]] == -1); // we haven't seen this field offset yet. + sortedFieldOrder[helperPtr->fieldOffsets[i]] = i; + } + + // Set the layoutSizes (includes holes from alignment of the fields.) + int lastField = -1; + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + int ordinal = sortedFieldOrder[i]; + if (ordinal == -1) + { + continue; + } + + if (lastField == -1) + { + lastField = ordinal; + continue; + } + + helperPtr->fieldLayoutSizes[lastField] = helperPtr->fieldOffsets[ordinal] - helperPtr->fieldOffsets[lastField]; + + lastField = ordinal; + } + // Now the last field + _ASSERTE(lastField != -1); // if lastField==-1, then the struct has no fields! + helperPtr->fieldLayoutSizes[lastField] = helperPtr->structSize - helperPtr->fieldOffsets[lastField]; + + // Calculate the eightbytes and their types. + unsigned int accumulatedSizeForEightByte = 0; + unsigned int lastEightByteOffset = 0; + unsigned int currentEightByte = 0; + + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + int ordinal = sortedFieldOrder[i]; + if (ordinal == -1) + { + continue; + } + + if ((accumulatedSizeForEightByte + helperPtr->fieldLayoutSizes[ordinal]) > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES) + { + // Save data for this eightbyte. + helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte; + helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset; + + // Set up for next eightbyte. + currentEightByte++; + _ASSERTE(currentEightByte < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + lastEightByteOffset = helperPtr->fieldOffsets[ordinal]; + accumulatedSizeForEightByte = 0; + } + + accumulatedSizeForEightByte += helperPtr->fieldLayoutSizes[ordinal]; + + _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeMemory); + + if (helperPtr->eightByteClassifications[currentEightByte] == helperPtr->fieldClassifications[ordinal]) + { + // Do nothing. The eight-byte is already classified. + } + else if (helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeNoClass) + { + helperPtr->eightByteClassifications[currentEightByte] = helperPtr->fieldClassifications[ordinal]; + } + else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeInteger) || + (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeInteger)) + { + _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeIntegerReference); + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeInteger; + } + else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeIntegerReference) || + (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeIntegerReference)) + { + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeIntegerReference; + } + else + { + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeSSE; + } + } + + helperPtr->eightByteCount = currentEightByte + 1; + helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte; + helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset; + _ASSERTE(helperPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + +#ifdef _DEBUG + LOG((LF_JIT, LL_EVERYTHING, " ----\n")); + LOG((LF_JIT, LL_EVERYTHING, " **** Number EightBytes: %d\n", helperPtr->eightByteCount)); + for (unsigned i = 0; i < helperPtr->eightByteCount; i++) + { + LOG((LF_JIT, LL_EVERYTHING, " **** eightByte %d -- classType: %s, eightByteOffset: %d, eightByteSize: %d\n", + i, GetSystemVClassificationTypeName(helperPtr->eightByteClassifications[i]), helperPtr->eightByteOffsets[i], helperPtr->eightByteSizes[i])); + } +#endif // _DEBUG + } + + return true; +} + +// Returns 'true' if the struct is passed in registers, 'false' otherwise. +bool MethodTable::ClassifyEightBytesForNativeStruct(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + SO_TOLERANT; + MODE_ANY; + } + CONTRACTL_END; + +#ifdef DACCESS_COMPILE + // No register classification for this case. + return false; +#else // DACCESS_COMPILE + + if (!HasLayout()) + { + return false; + } + + const FieldMarshaler *pFieldMarshaler = GetLayoutInfo()->GetFieldMarshalers(); + UINT numIntroducedFields = GetLayoutInfo()->GetNumCTMFields(); + + // No fields. + if (numIntroducedFields == 0) + { + return false; + } + + // No struct register passing with explicit layout. There may be cases where explicit layout may be still + // eligible for register struct passing, but it is hard to tell the real intent. Make it simple and just + // unconditionally disable register struct passing for explicit layout. + if (GetClass()->HasExplicitFieldOffsetLayout()) + { + LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesForNativeStruct: struct %s has explicit layout; will not be enregistered\n", + nestingLevel * 5, "", this->GetDebugClassName())); + return false; + } +#ifdef _DEBUG + LOG((LF_JIT, LL_EVERYTHING, "%*s**** Classify for native struct %s (%p), startOffset %d, total struct size %d\n", + nestingLevel * 5, "", this->GetDebugClassName(), this, startOffsetOfStruct, helperPtr->structSize)); + int fieldNum = -1; +#endif // _DEBUG + + while (numIntroducedFields--) + { +#ifdef _DEBUG + ++fieldNum; +#endif // _DEBUG + + FieldDesc *pField = pFieldMarshaler->GetFieldDesc(); + CorElementType fieldType = pField->GetFieldType(); + + // Invalid field type. + if (fieldType == ELEMENT_TYPE_END) + { + return false; + } + + DWORD fieldOffset = pFieldMarshaler->GetExternalOffset(); + unsigned normalizedFieldOffset = fieldOffset + startOffsetOfStruct; + + unsigned int fieldNativeSize = pFieldMarshaler->NativeSize(); + if (fieldNativeSize > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES) + { + // Pass on stack in this case. + return false; + } + + _ASSERTE(fieldNativeSize != (unsigned int)-1); + + // The field can't span past the end of the struct. + if ((normalizedFieldOffset + fieldNativeSize) > helperPtr->structSize) + { + _ASSERTE(false && "Invalid native struct size. The size of fields and overall size don't agree"); + return false; + } + + SystemVClassificationType fieldClassificationType = SystemVClassificationTypeUnknown; + +#ifdef _DEBUG + LPCUTF8 fieldName; + pField->GetName_NoThrow(&fieldName); +#endif // _DEBUG + + // Some NStruct Field Types have extra information and require special handling + NStructFieldType cls = pFieldMarshaler->GetNStructFieldType(); + if (cls == NFT_FIXEDCHARARRAYANSI) + { + fieldClassificationType = SystemVClassificationTypeInteger; + } + else if (cls == NFT_FIXEDARRAY) + { + VARTYPE vtElement = ((FieldMarshaler_FixedArray*)pFieldMarshaler)->GetElementVT(); + switch (vtElement) + { + case VT_EMPTY: + case VT_NULL: + case VT_BOOL: + case VT_I1: + case VT_I2: + case VT_I4: + case VT_I8: + case VT_UI1: + case VT_UI2: + case VT_UI4: + case VT_UI8: + case VT_PTR: + case VT_INT: + case VT_UINT: + case VT_LPSTR: + case VT_LPWSTR: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case VT_R4: + case VT_R8: + fieldClassificationType = SystemVClassificationTypeSSE; + break; + case VT_DECIMAL: + case VT_DATE: + case VT_BSTR: + case VT_UNKNOWN: + case VT_DISPATCH: + case VT_SAFEARRAY: + case VT_ERROR: + case VT_HRESULT: + case VT_CARRAY: + case VT_USERDEFINED: + case VT_RECORD: + case VT_FILETIME: + case VT_BLOB: + case VT_STREAM: + case VT_STORAGE: + case VT_STREAMED_OBJECT: + case VT_STORED_OBJECT: + case VT_BLOB_OBJECT: + case VT_CF: + case VT_CLSID: + default: + // Not supported. + return false; + } + } +#ifdef FEATURE_COMINTEROP + else if (cls == NFT_INTERFACE) + { + // COMInterop not supported for CORECLR. + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; + } +#ifdef FEATURE_CLASSIC_COMINTEROP + else if (cls == NFT_SAFEARRAY) + { + // COMInterop not supported for CORECLR. + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; + } +#endif // FEATURE_CLASSIC_COMINTEROP +#endif // FEATURE_COMINTEROP + else if (cls == NFT_NESTEDLAYOUTCLASS) + { + MethodTable* pFieldMT = ((FieldMarshaler_NestedLayoutClass*)pFieldMarshaler)->GetMethodTable(); + + bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct; + helperPtr->inEmbeddedStruct = true; + bool structRet = pFieldMT->ClassifyEightBytesForNativeStruct(helperPtr, nestingLevel + 1, normalizedFieldOffset); + helperPtr->inEmbeddedStruct = inEmbeddedStructPrev; + + if (!structRet) + { + // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister. + return false; + } + + continue; + } + else if (cls == NFT_NESTEDVALUECLASS) + { + MethodTable* pFieldMT = ((FieldMarshaler_NestedValueClass*)pFieldMarshaler)->GetMethodTable(); + + bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct; + helperPtr->inEmbeddedStruct = true; + bool structRet = pFieldMT->ClassifyEightBytesForNativeStruct(helperPtr, nestingLevel + 1, normalizedFieldOffset); + helperPtr->inEmbeddedStruct = inEmbeddedStructPrev; + + if (!structRet) + { + // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister. + return false; + } + + continue; + } + else if (cls == NFT_COPY1) + { + // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy1. + switch (fieldType) + { + case ELEMENT_TYPE_I1: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_U1: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + default: + // Invalid entry. + return false; // Pass on stack. + } + } + else if (cls == NFT_COPY2) + { + // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy2. + switch (fieldType) + { + case ELEMENT_TYPE_CHAR: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_I2: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_U2: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + default: + // Invalid entry. + return false; // Pass on stack. + } + } + else if (cls == NFT_COPY4) + { + // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy4. + switch (fieldType) + { + // At this point, ELEMENT_TYPE_I must be 4 bytes long. Same for ELEMENT_TYPE_U. + case ELEMENT_TYPE_I: + case ELEMENT_TYPE_I4: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_U: + case ELEMENT_TYPE_U4: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_R4: + fieldClassificationType = SystemVClassificationTypeSSE; + break; + + case ELEMENT_TYPE_PTR: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + default: + // Invalid entry. + return false; // Pass on stack. + } + } + else if (cls == NFT_COPY8) + { + // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy8. + switch (fieldType) + { + // At this point, ELEMENT_TYPE_I must be 8 bytes long. Same for ELEMENT_TYPE_U. + case ELEMENT_TYPE_I: + case ELEMENT_TYPE_I8: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_U: + case ELEMENT_TYPE_U8: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + case ELEMENT_TYPE_R8: + fieldClassificationType = SystemVClassificationTypeSSE; + break; + + case ELEMENT_TYPE_PTR: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + + default: + // Invalid entry. + return false; // Pass on stack. + } + } + else if (cls == NFT_FIXEDSTRINGUNI) + { + fieldClassificationType = SystemVClassificationTypeInteger; + } + else if (cls == NFT_FIXEDSTRINGANSI) + { + fieldClassificationType = SystemVClassificationTypeInteger; + } + else + { + // All other NStruct Field Types which do not require special handling. + switch (cls) + { +#ifdef FEATURE_COMINTEROP + case NFT_BSTR: + // COMInterop not supported for CORECLR. + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; + case NFT_HSTRING: + // COMInterop not supported for CORECLR. + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; +#endif // FEATURE_COMINTEROP + case NFT_STRINGUNI: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case NFT_STRINGANSI: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case NFT_DELEGATE: + return false; +#ifdef FEATURE_COMINTEROP + case NFT_VARIANT: + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; +#endif // FEATURE_COMINTEROP + case NFT_ANSICHAR: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case NFT_WINBOOL: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case NFT_CBOOL: + fieldClassificationType = SystemVClassificationTypeInteger; + break; + case NFT_DECIMAL: + return false; + case NFT_DATE: + return false; +#ifdef FEATURE_COMINTEROP + case NFT_VARIANTBOOL: + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; + case NFT_CURRENCY: + _ASSERTE(false && "COMInterop not supported for CORECLR."); + return false; +#endif // FEATURE_COMINTEROP + case NFT_ILLEGAL: + return false; + case NFT_SAFEHANDLE: + return false; + case NFT_CRITICALHANDLE: + return false; + default: + return false; + } + } + + if ((normalizedFieldOffset % fieldNativeSize) != 0) + { + // The spec requires that struct values on the stack from register passed fields expects + // those fields to be at their natural alignment. + + LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Native Field %d %s: offset %d (normalized %d), native size %d not at natural alignment; not enregistering struct\n", + nestingLevel * 5, "", fieldNum, fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize)); + return false; + } + + if ((int)normalizedFieldOffset <= helperPtr->largestFieldOffset) + { + // Find the field corresponding to this offset and update the size if needed. + // We assume that either it matches the offset of a previously seen field, or + // it is an out-of-order offset (the VM does give us structs in non-increasing + // offset order sometimes) that doesn't overlap any other field. + + int i; + for (i = helperPtr->currentUniqueOffsetField - 1; i >= 0; i--) + { + if (helperPtr->fieldOffsets[i] == normalizedFieldOffset) + { + if (fieldNativeSize > helperPtr->fieldSizes[i]) + { + helperPtr->fieldSizes[i] = fieldNativeSize; + } + + helperPtr->fieldClassifications[i] = ReClassifyField(helperPtr->fieldClassifications[i], fieldClassificationType); + + LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Native Field %d %s: offset %d (normalized %d), native size %d, union with uniqueOffsetField %d, field type classification %s, reclassified field to %s\n", + nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize, i, + GetSystemVClassificationTypeName(fieldClassificationType), + GetSystemVClassificationTypeName(helperPtr->fieldClassifications[i]))); + + break; + } + // Make sure the field doesn't start in the middle of another field. + _ASSERTE((normalizedFieldOffset < helperPtr->fieldOffsets[i]) || + (normalizedFieldOffset >= helperPtr->fieldOffsets[i] + helperPtr->fieldSizes[i])); + } + + if (i >= 0) + { + // The proper size of the union set of fields has been set above; continue to the next field. + continue; + } + } + else + { + helperPtr->largestFieldOffset = (int)normalizedFieldOffset; + } + + // Set the data for a new field. + + // The new field classification must not have been initialized yet. + _ASSERTE(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] == SystemVClassificationTypeNoClass); + + // There are only a few field classifications that are allowed. + _ASSERTE((fieldClassificationType == SystemVClassificationTypeInteger) || + (fieldClassificationType == SystemVClassificationTypeIntegerReference) || + (fieldClassificationType == SystemVClassificationTypeSSE)); + + helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = fieldClassificationType; + helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = fieldNativeSize; + helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedFieldOffset; + + LOG((LF_JIT, LL_EVERYTHING, " %*s**** Native Field %d %s: offset %d (normalized %d), size %d, currentUniqueOffsetField %d, field type classification %s, chosen field classification %s\n", + nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize, helperPtr->currentUniqueOffsetField, + GetSystemVClassificationTypeName(fieldClassificationType), + GetSystemVClassificationTypeName(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField]))); + + helperPtr->currentUniqueOffsetField++; + ((BYTE*&)pFieldMarshaler) += MAXFIELDMARSHALERSIZE; + _ASSERTE(helperPtr->currentUniqueOffsetField < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT); + + } // end per-field for loop + + if (!helperPtr->inEmbeddedStruct) + { + _ASSERTE(nestingLevel == 0); + + // We're at the top level of the recursion, and we're done looking at the fields. + // Now sort the fields by offset and set the output data. + + int sortedFieldOrder[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + sortedFieldOrder[i] = -1; + } + + for (unsigned i = 0; i < helperPtr->currentUniqueOffsetField; i++) + { + _ASSERTE(helperPtr->fieldOffsets[i] < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT); + _ASSERTE(sortedFieldOrder[helperPtr->fieldOffsets[i]] == -1); // we haven't seen this field offset yet. + sortedFieldOrder[helperPtr->fieldOffsets[i]] = i; + } + + // Set the layoutSizes (includes holes from alignment of the fields.) + int lastField = -1; + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + int ordinal = sortedFieldOrder[i]; + if (ordinal == -1) + { + continue; + } + + if (lastField == -1) + { + lastField = ordinal; + continue; + } + + helperPtr->fieldLayoutSizes[lastField] = helperPtr->fieldOffsets[ordinal] - helperPtr->fieldOffsets[lastField]; + + lastField = ordinal; + } + // Now the last field + _ASSERTE(lastField != -1); // if lastField==-1, then the struct has no fields! + helperPtr->fieldLayoutSizes[lastField] = helperPtr->structSize - helperPtr->fieldOffsets[lastField]; + + // Calculate the eightbytes and their types. + unsigned int accumulatedSizeForEightByte = 0; + unsigned int lastEightByteOffset = 0; + unsigned int currentEightByte = 0; + + for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + int ordinal = sortedFieldOrder[i]; + if (ordinal == -1) + { + continue; + } + + if ((accumulatedSizeForEightByte + helperPtr->fieldLayoutSizes[ordinal]) > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES) + { + // Save data for this eightbyte. + helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte; + helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset; + + // Set up for next eightbyte. + currentEightByte++; + _ASSERTE(currentEightByte < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + + lastEightByteOffset = helperPtr->fieldOffsets[ordinal]; + accumulatedSizeForEightByte = 0; + } + + accumulatedSizeForEightByte += helperPtr->fieldLayoutSizes[ordinal]; + + _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeMemory); + + if (helperPtr->eightByteClassifications[currentEightByte] == helperPtr->fieldClassifications[ordinal]) + { + // Do nothing. The eight-byte is already classified. + } + else if (helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeNoClass) + { + helperPtr->eightByteClassifications[currentEightByte] = helperPtr->fieldClassifications[ordinal]; + } + else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeInteger) || + (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeInteger)) + { + _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeIntegerReference); + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeInteger; + } + else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeIntegerReference) || + (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeIntegerReference)) + { + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeIntegerReference; + } + else + { + helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeSSE; + } + } + + helperPtr->eightByteCount = currentEightByte + 1; + helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte; + helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset; + _ASSERTE(helperPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS); + +#ifdef _DEBUG + LOG((LF_JIT, LL_EVERYTHING, " ----\n")); + LOG((LF_JIT, LL_EVERYTHING, " **** Number EightBytes: %d\n", helperPtr->eightByteCount)); + for (unsigned i = 0; i < helperPtr->eightByteCount; i++) + { + LOG((LF_JIT, LL_EVERYTHING, " **** eightByte %d -- classType: %s, eightByteOffset: %d, eightByteSize: %d\n", + i, GetSystemVClassificationTypeName(helperPtr->eightByteClassifications[i]), helperPtr->eightByteOffsets[i], helperPtr->eightByteSizes[i])); + } +#endif // _DEBUG + } + + return true; +#endif // DACCESS_COMPILE +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + #if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE) //========================================================================================== void MethodTable::AllocateRegularStaticBoxes() @@ -2643,7 +3556,7 @@ void MethodTable::DoRunClassInitThrowing() } description = ".cctor lock"; -#if _DEBUG +#ifdef _DEBUG description = GetDebugClassName(); #endif diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h index 8e6a59b6b3..e4aecf3140 100644 --- a/src/vm/methodtable.h +++ b/src/vm/methodtable.h @@ -53,7 +53,6 @@ class FCallMethodDesc; class EEClass; class EnCFieldDesc; class FieldDesc; -class FieldMarshaler; class JIT_TrialAlloc; struct LayoutRawFieldInfo; class MetaSig; @@ -80,6 +79,7 @@ class ComCallWrapperTemplate; #ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION class ClassFactoryBase; #endif // FEATURE_COMINTEROP_UNMANAGED_ACTIVATION +class ArgDestination; //============================================================================ // This is the in-memory structure of a class and it will evolve. @@ -625,6 +625,112 @@ public: typedef DPTR(MethodTableWriteableData) PTR_MethodTableWriteableData; typedef DPTR(MethodTableWriteableData const) PTR_Const_MethodTableWriteableData; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +inline +SystemVClassificationType CorInfoType2UnixAmd64Classification(CorElementType eeType) +{ + static const SystemVClassificationType toSystemVAmd64ClassificationTypeMap[] = { + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_END + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_VOID + SystemVClassificationTypeInteger, // ELEMENT_TYPE_BOOLEAN + SystemVClassificationTypeInteger, // ELEMENT_TYPE_CHAR + SystemVClassificationTypeInteger, // ELEMENT_TYPE_I1 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_U1 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_I2 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_U2 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_I4 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_U4 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_I8 + SystemVClassificationTypeInteger, // ELEMENT_TYPE_U8 + SystemVClassificationTypeSSE, // ELEMENT_TYPE_R4 + SystemVClassificationTypeSSE, // ELEMENT_TYPE_R8 + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_STRING + SystemVClassificationTypeInteger, // ELEMENT_TYPE_PTR + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_BYREF + SystemVClassificationTypeStruct, // ELEMENT_TYPE_VALUETYPE + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_CLASS + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_VAR - (type variable) + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_ARRAY + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_GENERICINST + SystemVClassificationTypeStruct, // ELEMENT_TYPE_TYPEDBYREF + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_VALUEARRAY_UNSUPPORTED + SystemVClassificationTypeInteger, // ELEMENT_TYPE_I + SystemVClassificationTypeInteger, // ELEMENT_TYPE_U + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_R_UNSUPPORTED + + // put the correct type when we know our implementation + SystemVClassificationTypeInteger, // ELEMENT_TYPE_FNPTR + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_OBJECT + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_SZARRAY + SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_MVAR + + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_CMOD_REQD + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_CMOD_OPT + SystemVClassificationTypeUnknown, // ELEMENT_TYPE_INTERNAL + }; + + _ASSERTE(sizeof(toSystemVAmd64ClassificationTypeMap) == ELEMENT_TYPE_MAX); + _ASSERTE(eeType < (CorElementType) sizeof(toSystemVAmd64ClassificationTypeMap)); + // spot check of the map + _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_I4] == SystemVClassificationTypeInteger); + _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_PTR] == SystemVClassificationTypeInteger); + _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_TYPEDBYREF] == SystemVClassificationTypeStruct); + + return (((int)eeType) < ELEMENT_TYPE_MAX) ? (toSystemVAmd64ClassificationTypeMap[eeType]) : SystemVClassificationTypeUnknown; +}; + +#define SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES 8 // Size of an eightbyte in bytes. +#define SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT 16 // Maximum number of fields in struct passed in registers + +struct SystemVStructRegisterPassingHelper +{ + SystemVStructRegisterPassingHelper(unsigned int totalStructSize) : + structSize(totalStructSize), + eightByteCount(0), + inEmbeddedStruct(false), + currentUniqueOffsetField(0), + largestFieldOffset(-1) + { + for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++) + { + eightByteClassifications[i] = SystemVClassificationTypeNoClass; + eightByteSizes[i] = 0; + eightByteOffsets[i] = 0; + } + + // Initialize the work arrays + for (int i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++) + { + fieldClassifications[i] = SystemVClassificationTypeNoClass; + fieldSizes[i] = 0; + fieldLayoutSizes[i] = 0; + fieldOffsets[i] = 0; + } + } + + // Input state. + unsigned int structSize; + + // These fields are the output; these are what is computed by the classification algorithm. + unsigned int eightByteCount; + SystemVClassificationType eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + unsigned int eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + unsigned int eightByteOffsets[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + + // Helper members to track state. + bool inEmbeddedStruct; + unsigned int currentUniqueOffsetField; // A virtual field that could encompass many overlapping fields. + int largestFieldOffset; + SystemVClassificationType fieldClassifications[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; + unsigned int fieldSizes[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; + unsigned int fieldLayoutSizes[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; + unsigned int fieldOffsets[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT]; +}; + +typedef DPTR(SystemVStructRegisterPassingHelper) SystemVStructRegisterPassingHelperPtr; + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + //=============================================================================================== // // GC data appears before the beginning of the MethodTable @@ -941,6 +1047,16 @@ public: // during object construction. void CheckRunClassInitAsIfConstructingThrowing(); +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + // Helper function for ClassifyEightBytes + static SystemVClassificationType ReClassifyField(SystemVClassificationType originalClassification, SystemVClassificationType newFieldClassification); + + // Builds the internal data structures and classifies struct eightbytes for Amd System V calling convention. + bool ClassifyEightBytes(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct); + bool ClassifyEightBytesForNativeStruct(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct); + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + // Copy m_dwFlags from another method table void CopyFlags(MethodTable * pOldMT) { @@ -1929,7 +2045,7 @@ public: SetFlag(enum_flag_HasPreciseInitCctors); } -#ifdef FEATURE_HFA +#if defined(FEATURE_HFA) inline bool IsHFA() { LIMITED_METHOD_CONTRACT; @@ -1941,6 +2057,23 @@ public: LIMITED_METHOD_CONTRACT; SetFlag(enum_flag_IsHFA); } +#endif // FEATURE_HFA + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + inline bool IsRegPassedStruct() + { + LIMITED_METHOD_CONTRACT; + return !!GetFlag(enum_flag_IsRegStructPassed); + } + + inline void SetRegPassedStruct() + { + LIMITED_METHOD_CONTRACT; + SetFlag(enum_flag_IsRegStructPassed); + } +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) + +#ifdef FEATURE_HFA CorElementType GetHFAType(); @@ -2642,6 +2775,7 @@ public: OBJECTREF FastBox(void** data); #ifndef DACCESS_COMPILE BOOL UnBoxInto(void *dest, OBJECTREF src); + BOOL UnBoxIntoArg(ArgDestination *argDest, OBJECTREF src); void UnBoxIntoUnchecked(void *dest, OBJECTREF src); #endif @@ -3775,7 +3909,19 @@ private: enum_flag_HasDefaultCtor = 0x00000200, enum_flag_HasPreciseInitCctors = 0x00000400, // Do we need to run class constructors at allocation time? (Not perf important, could be moved to EEClass +#if defined(FEATURE_HFA) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) +#error Can't define both FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +#endif enum_flag_IsHFA = 0x00000800, // This type is an HFA (Homogenous Floating-point Aggregate) +#endif // FEATURE_HFA + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF) +#if defined(FEATURE_HFA) +#error Can't define both FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +#endif + enum_flag_IsRegStructPassed = 0x00000800, // This type is a System V register passed struct. +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF // In a perfect world we would fill these flags using other flags that we already have // which have a constant value for something which has a component size. diff --git a/src/vm/methodtable.inl b/src/vm/methodtable.inl index a993556db6..aa07eea9d1 100644 --- a/src/vm/methodtable.inl +++ b/src/vm/methodtable.inl @@ -1716,6 +1716,32 @@ inline BOOL MethodTable::UnBoxInto(void *dest, OBJECTREF src) } //========================================================================================== +// unbox src into argument, making sure src is of the correct type. + +inline BOOL MethodTable::UnBoxIntoArg(ArgDestination *argDest, OBJECTREF src) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + MODE_COOPERATIVE; + } + CONTRACTL_END; + + if (Nullable::IsNullableType(TypeHandle(this))) + return Nullable::UnBoxIntoArgNoGC(argDest, src, this); + else + { + if (src == NULL || src->GetMethodTable() != this) + return FALSE; + + CopyValueClassArg(argDest, src->UnBox(), this, src->GetAppDomain(), 0); + } + return TRUE; +} + +//========================================================================================== // unbox src into dest, No checks are done inline void MethodTable::UnBoxIntoUnchecked(void *dest, OBJECTREF src) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index e1d2dbb2e5..0e3cb45675 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1897,8 +1897,23 @@ MethodTableBuilder::BuildMethodTableThrowing( #ifdef FEATURE_HFA CheckForHFA(pByValueClassCache); #endif +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +#ifdef FEATURE_HFA +#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time. +#endif // FEATURE_HFA + SystemVAmd64CheckForPassStructInRegister(); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +#ifdef FEATURE_HFA +#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time. +#endif // FEATURE_HFA + if (HasLayout()) + { + SystemVAmd64CheckForPassNativeStructInRegister(); + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF #ifdef FEATURE_HFA if (HasLayout()) { @@ -8429,6 +8444,93 @@ DWORD MethodTableBuilder::GetFieldSize(FieldDesc *pFD) return (1 << (DWORD)(DWORD_PTR&)(pFD->m_pMTOfEnclosingClass)); } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF +// checks whether the struct is enregisterable. +void MethodTableBuilder::SystemVAmd64CheckForPassStructInRegister() +{ + STANDARD_VM_CONTRACT; + + // This method should be called for valuetypes only + _ASSERTE(IsValueClass()); + + TypeHandle th(GetHalfBakedMethodTable()); + + if (th.IsTypeDesc()) + { + // Not an enregisterable managed structure. + return; + } + + DWORD totalStructSize = bmtFP->NumInstanceFieldBytes; + + // If num of bytes for the fields is bigger than CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS + // pass through stack + if (totalStructSize > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS) + { + LOG((LF_JIT, LL_EVERYTHING, "**** SystemVAmd64CheckForPassStructInRegister: struct %s is too big to pass in registers (%d bytes)\n", + this->GetDebugClassName(), totalStructSize)); + return; + } + + // Iterate through the fields and make sure they meet requirements to pass in registers + SystemVStructRegisterPassingHelper helper((unsigned int)totalStructSize); + + if (GetHalfBakedMethodTable()->ClassifyEightBytes(&helper, 0, 0)) + { + // All the above tests passed. It's registers passed struct! + GetHalfBakedMethodTable()->SetRegPassedStruct(); + + StoreEightByteClassification(&helper); + } +} + +// checks whether the struct is enregisterable. +void MethodTableBuilder::SystemVAmd64CheckForPassNativeStructInRegister() +{ + STANDARD_VM_CONTRACT; + DWORD totalStructSize = 0; + + // If not a native value type, return. + if (!IsValueClass()) + { + return; + } + + totalStructSize = GetLayoutInfo()->GetNativeSize(); + + // If num of bytes for the fields is bigger than CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS + // pass through stack + if (totalStructSize > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS) + { + LOG((LF_JIT, LL_EVERYTHING, "**** SystemVAmd64CheckForPassNativeStructInRegister: struct %s is too big to pass in registers (%d bytes)\n", + this->GetDebugClassName(), totalStructSize)); + return; + } + + _ASSERTE(HasLayout()); + + // Classify the native layout for this struct. + + // Iterate through the fields and make sure they meet requirements to pass in registers + SystemVStructRegisterPassingHelper helper((unsigned int)totalStructSize); + if (GetHalfBakedMethodTable()->ClassifyEightBytesForNativeStruct(&helper, 0, 0)) + { + GetLayoutInfo()->SetNativeStructPassedInRegisters(); + } +} + +// Store the eightbyte classification into the EEClass +void MethodTableBuilder::StoreEightByteClassification(SystemVStructRegisterPassingHelper* helper) +{ + EEClass* eeClass = GetHalfBakedMethodTable()->GetClass(); + LoaderAllocator* pAllocator = MethodTableBuilder::GetLoaderAllocator(); + AllocMemTracker* pamTracker = MethodTableBuilder::GetMemTracker(); + EnsureOptionalFieldsAreAllocated(eeClass, pamTracker, pAllocator->GetLowFrequencyHeap()); + eeClass->SetEightByteClassification(helper->eightByteCount, helper->eightByteClassifications, helper->eightByteSizes); +} + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + #ifdef FEATURE_HFA //--------------------------------------------------------------------------------------- // diff --git a/src/vm/methodtablebuilder.h b/src/vm/methodtablebuilder.h index bc543c1bf8..10ba278535 100644 --- a/src/vm/methodtablebuilder.h +++ b/src/vm/methodtablebuilder.h @@ -2980,6 +2980,15 @@ private: VOID CheckForNativeHFA(); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + // checks whether the struct is enregisterable. + void SystemVAmd64CheckForPassStructInRegister(); + void SystemVAmd64CheckForPassNativeStructInRegister(); + // Store the eightbyte classification into the EEClass + void StoreEightByteClassification(SystemVStructRegisterPassingHelper* helper); + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF + // this accesses the field size which is temporarily stored in m_pMTOfEnclosingClass // during class loading. Don't use any other time DWORD GetFieldSize(FieldDesc *pFD); diff --git a/src/vm/object.cpp b/src/vm/object.cpp index 3b07a12543..25a7109905 100644 --- a/src/vm/object.cpp +++ b/src/vm/object.cpp @@ -24,6 +24,7 @@ #endif #include "field.h" #include "gcscan.h" +#include "argdestination.h" #ifdef FEATURE_COMPRESSEDSTACK void* CompressedStackObject::GetUnmanagedCompressedStack() @@ -1498,6 +1499,31 @@ void CopyValueClassChecked(void* dest, void* src, MethodTable *pMT, AppDomain *p EX_END_CATCH(SwallowAllExceptions); CopyValueClassUnchecked(dest,src,pMT); } + +// Copy value class into the argument specified by the argDest, performing an appdomain check first. +// The destOffset is nonzero when copying values into Nullable<T>, it is the offset +// of the T value inside of the Nullable<T> +void CopyValueClassArgChecked(ArgDestination *argDest, void* src, MethodTable *pMT, AppDomain *pDomain, int destOffset) +{ + STATIC_CONTRACT_DEBUG_ONLY; + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + STATIC_CONTRACT_MODE_COOPERATIVE; + + DEBUG_ONLY_FUNCTION; + + FAULT_NOT_FATAL(); + EX_TRY + { + Object::AssignValueTypeAppDomain(pMT, src, pDomain); + } + EX_CATCH + { + } + EX_END_CATCH(SwallowAllExceptions); + CopyValueClassArgUnchecked(argDest, src, pMT, destOffset); +} #endif void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT) @@ -1563,6 +1589,51 @@ void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT) } } +// Copy value class into the argument specified by the argDest. +// The destOffset is nonzero when copying values into Nullable<T>, it is the offset +// of the T value inside of the Nullable<T> +void STDCALL CopyValueClassArgUnchecked(ArgDestination *argDest, void* src, MethodTable *pMT, int destOffset) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + STATIC_CONTRACT_MODE_COOPERATIVE; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + if (argDest->IsStructPassedInRegs()) + { + argDest->CopyStructToRegisters(src, pMT->GetNumInstanceFieldBytes(), destOffset); + return; + } + +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + // destOffset is only valid for Nullable<T> passed in registers + _ASSERTE(destOffset == 0); + + CopyValueClassUnchecked(argDest->GetDestinationAddress(), src, pMT); +} + +// Initialize the value class argument to zeros +void InitValueClassArg(ArgDestination *argDest, MethodTable *pMT) +{ + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + STATIC_CONTRACT_FORBID_FAULT; + STATIC_CONTRACT_MODE_COOPERATIVE; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + if (argDest->IsStructPassedInRegs()) + { + argDest->ZeroStructInRegisters(pMT->GetNumInstanceFieldBytes()); + return; + } + +#endif + InitValueClass(argDest->GetDestinationAddress(), pMT); +} + #if defined (VERIFY_HEAP) #include "dbginterface.h" @@ -3245,7 +3316,7 @@ BOOL Nullable::UnBox(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT) if (boxedVal == NULL) { - // logicall we are doing *dest->HasValueAddr(destMT) = false; + // Logically we are doing *dest->HasValueAddr(destMT) = false; // We zero out the whole structure becasue it may contain GC references // and these need to be initialized to zero. (could optimize in the non-GC case) InitValueClass(destPtr, destMT); @@ -3302,7 +3373,7 @@ BOOL Nullable::UnBoxNoGC(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT) if (boxedVal == NULL) { - // logicall we are doing *dest->HasValueAddr(destMT) = false; + // Logically we are doing *dest->HasValueAddr(destMT) = false; // We zero out the whole structure becasue it may contain GC references // and these need to be initialized to zero. (could optimize in the non-GC case) InitValueClass(destPtr, destMT); @@ -3328,6 +3399,64 @@ BOOL Nullable::UnBoxNoGC(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT) } //=============================================================================== +// Special Logic to unbox a boxed T as a nullable<T> into an argument +// specified by the argDest. +// Does not handle type equivalence (may conservatively return FALSE) +BOOL Nullable::UnBoxIntoArgNoGC(ArgDestination *argDest, OBJECTREF boxedVal, MethodTable* destMT) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + SO_TOLERANT; + } + CONTRACTL_END; + +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (argDest->IsStructPassedInRegs()) + { + // We should only get here if we are unboxing a T as a Nullable<T> + _ASSERTE(IsNullableType(destMT)); + + // We better have a concrete instantiation, or our field offset asserts are not useful + _ASSERTE(!destMT->ContainsGenericVariables()); + + if (boxedVal == NULL) + { + // Logically we are doing *dest->HasValueAddr(destMT) = false; + // We zero out the whole structure becasue it may contain GC references + // and these need to be initialized to zero. (could optimize in the non-GC case) + InitValueClassArg(argDest, destMT); + } + else + { + if (!IsNullableForTypeNoGC(destMT, boxedVal->GetMethodTable())) + { + // For safety's sake, also allow true nullables to be unboxed normally. + // This should not happen normally, but we want to be robust + if (destMT == boxedVal->GetMethodTable()) + { + CopyValueClassArg(argDest, boxedVal->GetData(), destMT, boxedVal->GetAppDomain(), 0); + return TRUE; + } + return FALSE; + } + + Nullable* dest = (Nullable*)argDest->GetStructGenRegDestinationAddress(); + *dest->HasValueAddr(destMT) = true; + int destOffset = (BYTE*)dest->ValueAddr(destMT) - (BYTE*)dest; + CopyValueClassArg(argDest, boxedVal->UnBox(), boxedVal->GetMethodTable(), boxedVal->GetAppDomain(), destOffset); + } + return TRUE; + } + +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + + return UnBoxNoGC(argDest->GetDestinationAddress(), boxedVal, destMT); +} + +//=============================================================================== // Special Logic to unbox a boxed T as a nullable<T> // Does not do any type checks. void Nullable::UnBoxNoCheck(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT) @@ -3350,7 +3479,7 @@ void Nullable::UnBoxNoCheck(void* destPtr, OBJECTREF boxedVal, MethodTable* dest if (boxedVal == NULL) { - // logicall we are doing *dest->HasValueAddr(destMT) = false; + // Logically we are doing *dest->HasValueAddr(destMT) = false; // We zero out the whole structure becasue it may contain GC references // and these need to be initialized to zero. (could optimize in the non-GC case) InitValueClass(destPtr, destMT); diff --git a/src/vm/object.h b/src/vm/object.h index abf15fa591..5808e6c0eb 100644 --- a/src/vm/object.h +++ b/src/vm/object.h @@ -94,6 +94,8 @@ class CtxStaticData; class DomainAssembly; class AssemblyNative; class WaitHandleNative; +class ArgDestination; + struct RCW; #if CHECK_APP_DOMAIN_LEAKS @@ -702,6 +704,7 @@ inline void ClearObjectReference(OBJECTREF* dst) // CopyValueClass sets a value class field void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT); +void STDCALL CopyValueClassArgUnchecked(ArgDestination *argDest, void* src, MethodTable *pMT, int destOffset); inline void InitValueClass(void *dest, MethodTable *pMT) { @@ -709,18 +712,24 @@ inline void InitValueClass(void *dest, MethodTable *pMT) ZeroMemoryInGCHeap(dest, pMT->GetNumInstanceFieldBytes()); } +// Initialize value class argument +void InitValueClassArg(ArgDestination *argDest, MethodTable *pMT); + #if CHECK_APP_DOMAIN_LEAKS void SetObjectReferenceChecked(OBJECTREF *dst,OBJECTREF ref, AppDomain *pAppDomain); void CopyValueClassChecked(void* dest, void* src, MethodTable *pMT, AppDomain *pAppDomain); +void CopyValueClassArgChecked(ArgDestination *argDest, void* src, MethodTable *pMT, AppDomain *pAppDomain, int destOffset); #define SetObjectReference(_d,_r,_a) SetObjectReferenceChecked(_d, _r, _a) #define CopyValueClass(_d,_s,_m,_a) CopyValueClassChecked(_d,_s,_m,_a) +#define CopyValueClassArg(_d,_s,_m,_a,_o) CopyValueClassArgChecked(_d,_s,_m,_a,_o) #else #define SetObjectReference(_d,_r,_a) SetObjectReferenceUnchecked(_d, _r) #define CopyValueClass(_d,_s,_m,_a) CopyValueClassUnchecked(_d,_s,_m) +#define CopyValueClassArg(_d,_s,_m,_a,_o) CopyValueClassArgUnchecked(_d,_s,_m,_o) #endif @@ -4649,6 +4658,7 @@ public: static OBJECTREF Box(void* src, MethodTable* nullable); static BOOL UnBox(void* dest, OBJECTREF boxedVal, MethodTable* destMT); static BOOL UnBoxNoGC(void* dest, OBJECTREF boxedVal, MethodTable* destMT); + static BOOL UnBoxIntoArgNoGC(ArgDestination *argDest, OBJECTREF boxedVal, MethodTable* destMT); static void UnBoxNoCheck(void* dest, OBJECTREF boxedVal, MethodTable* destMT); static OBJECTREF BoxedNullableNull(TypeHandle nullableType) { return 0; } diff --git a/src/vm/reflectioninvocation.cpp b/src/vm/reflectioninvocation.cpp index 777b120ad4..d3a3125ed0 100644 --- a/src/vm/reflectioninvocation.cpp +++ b/src/vm/reflectioninvocation.cpp @@ -34,6 +34,7 @@ #endif #include "dbginterface.h" +#include "argdestination.h" // these flags are defined in XXXInfo.cs and only those that are used are replicated here #define INVOCATION_FLAGS_UNKNOWN 0x00000000 @@ -1578,7 +1579,7 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod, TypeHandle th = gc.pSig->GetArgumentAt(i); - int ofs = argit.GetNextOffset(); + int ofs = argit.GetNextOffset(); _ASSERTE(ofs != TransitionBlock::InvalidOffset); #ifdef CALLDESCR_REGTYPEMAP @@ -1590,16 +1591,22 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod, // least one such argument we point the call worker at the floating point area of the frame (we leave // it null otherwise since the worker can perform a useful optimization if it knows no floating point // registers need to be set up). - if ((ofs < 0) && (callDescrData.pFloatArgumentRegisters == NULL)) + + if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs()) && + (callDescrData.pFloatArgumentRegisters == NULL)) + { callDescrData.pFloatArgumentRegisters = (FloatArgumentRegisters*) (pTransitionBlock + - TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + } #endif UINT structSize = argit.GetArgSize(); bool needsStackCopy = false; - PVOID pArgDst = pTransitionBlock + ofs; + // A boxed Nullable<T> is represented as boxed T. So to pass a Nullable<T> by reference, + // we have to create a Nullable<T> on stack, copy the T into it, then pass it to the callee and + // after returning from the call, copy the T out of the Nullable<T> back to the boxed T. TypeHandle nullableType = NullableTypeOfByref(th); if (!nullableType.IsNull()) { th = nullableType; @@ -1607,17 +1614,21 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod, needsStackCopy = true; } #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE - else - if (argit.IsArgPassedByRef()) { + else if (argit.IsArgPassedByRef()) + { needsStackCopy = true; } #endif + ArgDestination argDest(pTransitionBlock, ofs, argit.GetArgLocDescForStructInRegs()); + if(needsStackCopy) { MethodTable * pMT = th.GetMethodTable(); _ASSERTE(pMT && pMT->IsValueType()); + PVOID pArgDst = argDest.GetDestinationAddress(); + PVOID pStackCopy = _alloca(structSize); *(PVOID *)pArgDst = pStackCopy; pArgDst = pStackCopy; @@ -1632,9 +1643,12 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod, { pValueClasses = new (_alloca(sizeof(ValueClassInfo))) ValueClassInfo(pStackCopy, pMT, pValueClasses); } + + // We need a new ArgDestination that points to the stack copy + argDest = ArgDestination(pStackCopy, 0, NULL); } - InvokeUtil::CopyArg(th, &(gc.args->m_Array[i]), pArgDst); + InvokeUtil::CopyArg(th, &(gc.args->m_Array[i]), &argDest); } ENDFORBIDGC(); diff --git a/src/vm/siginfo.cpp b/src/vm/siginfo.cpp index 25fe157784..ec023e9d0b 100644 --- a/src/vm/siginfo.cpp +++ b/src/vm/siginfo.cpp @@ -25,6 +25,7 @@ #include "sigbuilder.h" #include "../md/compiler/custattr.h" #include <corhlprpriv.h> +#include "argdestination.h" /*******************************************************************/ const CorTypeInfo::CorTypeInfoEntry CorTypeInfo::info[ELEMENT_TYPE_MAX] = @@ -4976,11 +4977,28 @@ void ReportPointersFromValueType(promote_func *fn, ScanContext *sc, PTR_MethodTa } while (cur >= last); } +void ReportPointersFromValueTypeArg(promote_func *fn, ScanContext *sc, PTR_MethodTable pMT, ArgDestination *pSrc) +{ + WRAPPER_NO_CONTRACT; + + if (!pMT->ContainsPointers()) + return; +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (pSrc->IsStructPassedInRegs()) + { + pSrc->ReportPointersFromStructInRegisters(fn, sc, pMT->GetNumInstanceFieldBytes()); + return; + } +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + + ReportPointersFromValueType(fn, sc, pMT, pSrc->GetDestinationAddress()); +} + //------------------------------------------------------------------ // Perform type-specific GC promotion on the value (based upon the // last type retrieved by NextArg()). //------------------------------------------------------------------ -VOID MetaSig::GcScanRoots(PTR_VOID pValue, +VOID MetaSig::GcScanRoots(ArgDestination *pValue, promote_func *fn, ScanContext* sc, promote_carefully_func *fnc) @@ -4997,7 +5015,7 @@ VOID MetaSig::GcScanRoots(PTR_VOID pValue, CONTRACTL_END - PTR_PTR_Object pArgPtr = (PTR_PTR_Object)pValue; + PTR_PTR_Object pArgPtr = (PTR_PTR_Object)pValue->GetDestinationAddress(); if (fnc == NULL) fnc = &PromoteCarefully; @@ -5083,7 +5101,7 @@ VOID MetaSig::GcScanRoots(PTR_VOID pValue, } #endif // ENREGISTERED_PARAMTYPE_MAXSIZE - ReportPointersFromValueType(fn, sc, pMT, pArgPtr); + ReportPointersFromValueTypeArg(fn, sc, pMT, pValue); } break; diff --git a/src/vm/siginfo.hpp b/src/vm/siginfo.hpp index 06d3b66a24..586802b1b1 100644 --- a/src/vm/siginfo.hpp +++ b/src/vm/siginfo.hpp @@ -50,6 +50,7 @@ unsigned GetSizeForCorElementType(CorElementType etyp); const ElementTypeInfo* GetElementTypeInfo(CorElementType etyp); class SigBuilder; +class ArgDestination; typedef const struct HardCodedMetaSig *LPHARDCODEDMETASIG; @@ -841,7 +842,7 @@ class MetaSig // Perform type-specific GC promotion on the value (based upon the // last type retrieved by NextArg()). //------------------------------------------------------------------ - VOID GcScanRoots(PTR_VOID pValue, promote_func *fn, + VOID GcScanRoots(ArgDestination *pValue, promote_func *fn, ScanContext* sc, promote_carefully_func *fnc = NULL); //------------------------------------------------------------------ @@ -888,7 +889,7 @@ class MetaSig BOOL IsReturnTypeVoid() const; - enum RETURNTYPE {RETOBJ, RETBYREF, RETNONOBJ}; + enum RETURNTYPE {RETOBJ, RETBYREF, RETNONOBJ, RETVALUETYPE}; CorElementType GetReturnTypeNormalized(TypeHandle * pthValueType = NULL) const; diff --git a/src/vm/stackbuildersink.cpp b/src/vm/stackbuildersink.cpp index bcd8d62f50..5d6aa7bb15 100644 --- a/src/vm/stackbuildersink.cpp +++ b/src/vm/stackbuildersink.cpp @@ -404,13 +404,16 @@ void CallDescrWithObjectArray(OBJECTREF& pServer, #endif #ifdef CALLDESCR_FPARGREGS - // Under CALLDESCR_FPARGREGS -ve offsets indicate arguments in floating point registers. If we have at + // Under CALLDESCR_FPARGREGS we can have arguments in floating point registers. If we have at // least one such argument we point the call worker at the floating point area of the frame (we leave // it null otherwise since the worker can perform a useful optimization if it knows no floating point // registers need to be set up). - if (TransitionBlock::IsFloatArgumentRegisterOffset(ofs) && (pFloatArgumentRegisters == NULL)) + if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs()) && + (pFloatArgumentRegisters == NULL)) + { pFloatArgumentRegisters = (FloatArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + } #endif if (argit.GetArgType() == ELEMENT_TYPE_BYREF) diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index 065c396929..5e4c05f514 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -2242,6 +2242,9 @@ Thread::Thread() #endif m_pAllLoggedTypes = NULL; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + m_pHijackReturnTypeClass = NULL; +#endif } diff --git a/src/vm/threads.h b/src/vm/threads.h index 0ab550f741..da94c0e2ce 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -689,6 +689,9 @@ void InitThreadManager(); EXTERN_C void __stdcall OnHijackObjectTripThread(); // hijacked JIT code is returning an objectref EXTERN_C void __stdcall OnHijackInteriorPointerTripThread(); // hijacked JIT code is returning a byref EXTERN_C void __stdcall OnHijackScalarTripThread(); // hijacked JIT code is returning a non-objectref, non-FP +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +EXTERN_C void __stdcall OnHijackStructInRegsTripThread(); // hijacked JIT code is returning a struct in registers +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #ifdef _TARGET_X86_ EXTERN_C void __stdcall OnHijackFloatingPointTripThread(); // hijacked JIT code is returning an FP value @@ -1017,6 +1020,9 @@ typedef DWORD (*AppropriateWaitFunc) (void *args, DWORD timeout, DWORD option); EXTERN_C void STDCALL OnHijackObjectWorker(HijackArgs * pArgs); EXTERN_C void STDCALL OnHijackInteriorPointerWorker(HijackArgs * pArgs); EXTERN_C void STDCALL OnHijackScalarWorker(HijackArgs * pArgs); +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +EXTERN_C void STDCALL OnHijackStructInRegsWorker(HijackArgs * pArgs); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #endif // FEATURE_HIJACK // This is the code we pass around for Thread.Interrupt, mainly for assertions @@ -1067,7 +1073,9 @@ class Thread: public IUnknown friend void STDCALL OnHijackObjectWorker(HijackArgs *pArgs); friend void STDCALL OnHijackInteriorPointerWorker(HijackArgs *pArgs); friend void STDCALL OnHijackScalarWorker(HijackArgs *pArgs); - +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + friend void STDCALL OnHijackStructInRegsWorker(HijackArgs *pArgs); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #ifdef PLATFORM_UNIX friend void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext); #endif // PLATFORM_UNIX @@ -5553,6 +5561,24 @@ public: _ASSERTE(pAllLoggedTypes != NULL ? m_pAllLoggedTypes == NULL : TRUE); m_pAllLoggedTypes = pAllLoggedTypes; } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +private: + EEClass* m_pHijackReturnTypeClass; +public: + EEClass* GetHijackReturnTypeClass() + { + LIMITED_METHOD_CONTRACT; + + return m_pHijackReturnTypeClass; + } + + void SetHijackReturnTypeClass(EEClass* pClass) + { + LIMITED_METHOD_CONTRACT; + + m_pHijackReturnTypeClass = pClass; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING }; // End of class Thread diff --git a/src/vm/threadsuspend.cpp b/src/vm/threadsuspend.cpp index 10ea699faa..5d414192c4 100644 --- a/src/vm/threadsuspend.cpp +++ b/src/vm/threadsuspend.cpp @@ -7260,7 +7260,7 @@ void STDCALL OnHijackInteriorPointerWorker(HijackArgs * pArgs) GC_ON_TRANSITIONS (GCOnTransition); } #endif - pArgs->ReturnValue = (size_t)ptr; + *(size_t*)&pArgs->ReturnValue = (size_t)ptr; } GCPROTECT_END(); // trashes or here! @@ -7327,6 +7327,90 @@ void STDCALL OnHijackScalarWorker(HijackArgs * pArgs) #endif } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +// A hijacked method is returning a struct in registers to its caller. +// The struct can possibly contain object references that we have to +// protect. +void STDCALL OnHijackStructInRegsWorker(HijackArgs * pArgs) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + SO_TOLERANT; + } CONTRACTL_END; + +#ifdef HIJACK_NONINTERRUPTIBLE_THREADS + Thread *thread = GetThread(); + + EEClass* eeClass = thread->GetHijackReturnTypeClass(); + + OBJECTREF oref[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; + int orefCount = 0; + for (int i = 0; i < eeClass->GetNumberEightBytes(); i++) + { + if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerReference) + { + oref[orefCount++] = ObjectToOBJECTREF(*(Object **) &pArgs->ReturnValue[i]); + } + } + +#ifdef FEATURE_STACK_PROBE + if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain) + { + RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT), thread); + } +#endif + + CONTRACT_VIOLATION(SOToleranceViolation); + + thread->ResetThreadState(Thread::TS_Hijacked); + + // Fix up our caller's stack, so it can resume from the hijack correctly + pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; + + // Build a frame so that stack crawling can proceed from here back to where + // we will resume execution. + FrameWithCookie<HijackFrame> frame((void *)pArgs->ReturnAddress, thread, pArgs); + + GCPROTECT_ARRAY_BEGIN(oref[0], orefCount) + { +#ifdef _DEBUG + BOOL GCOnTransition = FALSE; + if (g_pConfig->FastGCStressLevel()) { + GCOnTransition = GC_ON_TRANSITIONS (FALSE); + } +#endif + +#ifdef TIME_SUSPEND + g_SuspendStatistics.cntHijackTrap++; +#endif + + CommonTripThread(); +#ifdef _DEBUG + if (g_pConfig->FastGCStressLevel()) { + GC_ON_TRANSITIONS (GCOnTransition); + } +#endif + + // Update the references in the returned struct + orefCount = 0; + for (int i = 0; i < eeClass->GetNumberEightBytes(); i++) + { + if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerReference) + { + *((OBJECTREF *) &pArgs->ReturnValue[i]) = oref[orefCount++]; + } + } + } + GCPROTECT_END(); + + frame.Pop(); +#else + PORTABILITY_ASSERT("OnHijackInteriorPointerWorker not implemented on this platform."); +#endif +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + #ifndef PLATFORM_UNIX // Get the ExecutionState for the specified SwitchIn thread. Note that this is @@ -7806,11 +7890,19 @@ BOOL Thread::HandledJITCase(BOOL ForTaskSwitchIn) else #endif // _TARGET_X86_ { - MetaSig::RETURNTYPE type = esb.m_pFD->ReturnsObject(); + MethodTable* pMT = NULL; + MetaSig::RETURNTYPE type = esb.m_pFD->ReturnsObject(INDEBUG_COMMA(false) &pMT); if (type == MetaSig::RETOBJ) pvHijackAddr = OnHijackObjectTripThread; else if (type == MetaSig::RETBYREF) pvHijackAddr = OnHijackInteriorPointerTripThread; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (type == MetaSig::RETVALUETYPE) + { + pThread->SetHijackReturnTypeClass(pMT->GetClass()); + pvHijackAddr = OnHijackStructInRegsTripThread; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING } } @@ -8354,7 +8446,8 @@ void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext) // Hijack the return address to point to the appropriate routine based on the method's return type. void *pvHijackAddr = OnHijackScalarTripThread; MethodDesc *pMethodDesc = codeInfo.GetMethodDesc(); - MetaSig::RETURNTYPE type = pMethodDesc->ReturnsObject(); + MethodTable* pMT = NULL; + MetaSig::RETURNTYPE type = pMethodDesc->ReturnsObject(INDEBUG_COMMA(false) &pMT); if (type == MetaSig::RETOBJ) { pvHijackAddr = OnHijackObjectTripThread; @@ -8363,6 +8456,13 @@ void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext) { pvHijackAddr = OnHijackInteriorPointerTripThread; } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (type == MetaSig::RETVALUETYPE) + { + pThread->SetHijackReturnTypeClass(pMT->GetClass()); + pvHijackAddr = OnHijackStructInRegsTripThread; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING pThread->HijackThread(pvHijackAddr, &executionState); } diff --git a/tests/src/JIT/SIMD/project.lock.json b/tests/src/JIT/SIMD/project.lock.json index 5a0680001c..6cf037e202 100644 --- a/tests/src/JIT/SIMD/project.lock.json +++ b/tests/src/JIT/SIMD/project.lock.json @@ -242,7 +242,10 @@ "ref/MonoTouch10/_._", "ref/net46/System.Console.dll", "ref/xamarinios10/_._", - "ref/xamarinmac20/_._" + "ref/xamarinmac20/_._", + "ru/System.Console.xml", + "zh-hans/System.Console.xml", + "zh-hant/System.Console.xml" ] }, "System.Diagnostics.Debug/4.0.10": { |