diff options
author | Brian Sullivan <briansul@microsoft.com> | 2016-04-20 18:28:04 -0700 |
---|---|---|
committer | Brian Sullivan <briansul@microsoft.com> | 2016-04-20 18:28:04 -0700 |
commit | 44bba29a99579b6c93dfa51f40bf79467f548c86 (patch) | |
tree | c16000c86edc455af22a15e480ef9dbb1b4288b6 /src/jit | |
parent | 9e7f0fe5095be446c5dc801e9841e8cfee2c9b74 (diff) | |
parent | f6846c2d17997da6a94d7f5bc8afd913ce10b416 (diff) | |
download | coreclr-44bba29a99579b6c93dfa51f40bf79467f548c86.tar.gz coreclr-44bba29a99579b6c93dfa51f40bf79467f548c86.tar.bz2 coreclr-44bba29a99579b6c93dfa51f40bf79467f548c86.zip |
Merge pull request #4434 from briansull/struct16-abi
ARM64 Work Item 3817, 3524 - Struct16 decomposition
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenarm64.cpp | 555 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 27 | ||||
-rw-r--r-- | src/jit/codegeninterface.h | 2 | ||||
-rw-r--r-- | src/jit/codegenlinear.h | 4 | ||||
-rw-r--r-- | src/jit/compiler.h | 6 | ||||
-rw-r--r-- | src/jit/gentree.h | 94 | ||||
-rw-r--r-- | src/jit/lower.cpp | 29 | ||||
-rw-r--r-- | src/jit/lower.h | 4 | ||||
-rw-r--r-- | src/jit/lowerarm64.cpp | 224 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 110 | ||||
-rw-r--r-- | src/jit/morph.cpp | 415 | ||||
-rw-r--r-- | src/jit/target.h | 6 |
12 files changed, 976 insertions, 500 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 1473171863..a79a1279e8 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2346,7 +2346,6 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) genProduceReg(treeNode); } - /***************************************************************************** * * Generate code for a single node in the tree. @@ -2652,28 +2651,16 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) { assert(!isRegCandidate); - if (targetType == TYP_STRUCT) - { - // At this point any TYP_STRUCT LclVar must be a two register argument - assert(varDsc->lvSize() == 2*TARGET_POINTER_SIZE); - - const BYTE * gcPtrs = varDsc->lvGcLayout; - var_types type0 = compiler->getJitGCType(gcPtrs[0]); - var_types type1 = compiler->getJitGCType(gcPtrs[1]); + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), targetReg, varNum, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), REG_NEXT(targetReg), varNum, TARGET_POINTER_SIZE); - } - else // targetType is a normal scalar type and not a TYP_STRUCT - { - instruction ins = ins_Load(targetType); - emitAttr attr = emitTypeSize(targetType); + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - emit->emitIns_R_S(ins, attr, targetReg, varNum, 0); - } + emit->emitIns_R_S(ins, attr, targetReg, varNum, 0); genProduceReg(treeNode); } } @@ -2849,10 +2836,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genProduceReg(treeNode); break; - case GT_OBJ: - genCodeForObj(treeNode->AsObj()); - break; - case GT_MULHI: genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); @@ -3174,123 +3157,12 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_PUTARG_STK: - { - // Get argument offset on stack. - // Here we cross check that argument offset hasn't changed from lowering to codegen since - // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; - -#ifdef DEBUG - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); - assert(curArgTabEntry); - assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); -#endif // DEBUG - - GenTreePtr data = treeNode->gtOp.gtOp1; - unsigned varNum; // typically this is the varNum for the Outgoing arg space - -#if FEATURE_FASTTAILCALL - bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; -#else - const bool putInIncomingArgArea = false; -#endif - // Whether to setup stk arg in incoming or out-going arg area? - // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. - // All other calls - stk arg is setup in out-going arg area. - if (putInIncomingArgArea) - { - // The first varNum is guaranteed to be the first incoming arg of the method being compiled. - // See lvaInitTypeRef() for the order in which lvaTable entries are initialized. - varNum = 0; -#ifdef DEBUG -#if FEATURE_FASTTAILCALL - // This must be a fast tail call. - assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); - - // Since it is a fast tail call, the existence of first incoming arg is guaranteed - // because fast tail call requires that in-coming arg area of caller is >= out-going - // arg area required for tail call. - LclVarDsc* varDsc = compiler->lvaTable; - assert(varDsc != nullptr); - assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0))); -#endif // FEATURE_FASTTAILCALL -#endif - } - else - { - varNum = compiler->lvaOutgoingArgSpaceVar; - } - - // Do we have a TYP_STRUCT argument, if so it must be a 16-byte pass-by-value struct - if (targetType == TYP_STRUCT) - { - // We will use two store instructions that each write a register sized value - - // We must have a multi-reg struct that takes two slots - assert(curArgTabEntry->numSlots == 2); - assert(!data->isContained()); // Impossible to have a contained 16-byte operand - - // We will need to determine the GC type to use for each of the stores - // We obtain the gcPtrs values by examining op1 using getStructGcPtrsFromOp() - - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; - - compiler->getStructGcPtrsFromOp(data, &gcPtrs[0]); - - var_types type0 = compiler->getJitGCType(gcPtrs[0]); - var_types type1 = compiler->getJitGCType(gcPtrs[1]); - - genConsumeReg(data); - - // Emit two store instructions to store two consecutive registers into the outgoing argument area - getEmitter()->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), data->gtRegNum, varNum, argOffset); - getEmitter()->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), REG_NEXT(data->gtRegNum), varNum, argOffset + TARGET_POINTER_SIZE); - } - else // a normal non-Struct targetType - { - instruction storeIns = ins_Store(targetType); - emitAttr storeAttr = emitTypeSize(targetType); - - // If it is contained then data must be the integer constant zero - if (data->isContained()) - { - assert(data->OperGet() == GT_CNS_INT); - assert(data->AsIntConCommon()->IconValue() == 0); - getEmitter()->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNum, argOffset); - } - else - { - genConsumeReg(data); - getEmitter()->emitIns_S_R(storeIns, storeAttr, data->gtRegNum, varNum, argOffset); - } - } - } + genPutArgStk(treeNode); break; case GT_PUTARG_REG: - if (targetType == TYP_STRUCT) - { - // We will need to determine the GC type to use for each of the stores - // We obtain the gcPtrs values by examining op1 using getStructGcPtrsFromOp() - - GenTree *op1 = treeNode->gtOp.gtOp1; - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; - - compiler->getStructGcPtrsFromOp(op1, &gcPtrs[0]); - - var_types type0 = compiler->getJitGCType(gcPtrs[0]); - var_types type1 = compiler->getJitGCType(gcPtrs[1]); - - // If child node is not already in the registers we need, move it - - genConsumeReg(op1); // for multireg operands - if (targetReg != op1->gtRegNum) - { - inst_RV_RV(ins_Copy(type0), targetReg, op1->gtRegNum, type0); - inst_RV_RV(ins_Copy(type1), REG_NEXT(targetReg), REG_NEXT(op1->gtRegNum), type1); - } - } - else // a normal non-Struct targetType + assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg + // We have a normal non-Struct targetType { GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it @@ -5244,11 +5116,35 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (curArgTabEntry->regNum == REG_STK) continue; - regNumber argReg = curArgTabEntry->regNum; - genConsumeReg(argNode); - if (argNode->gtRegNum != argReg) + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_LIST) + { + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + regNumber argReg = curArgTabEntry->regNum; + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum); + } + + argReg = REG_NEXT(argReg); + } + } + else { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } } // In the case of a varargs call, @@ -6536,161 +6432,298 @@ CodeGen::genIntrinsic(GenTreePtr treeNode) } //--------------------------------------------------------------------- -// genCodeForObj - generate code for a GT_OBJ node +// genPutArgStk - generate code for a GT_PUTARG_STK node // // Arguments -// treeNode - the GT_OBJ node +// treeNode - the GT_PUTARG_STK node // // Return value: // None // - -void CodeGen::genCodeForObj(GenTreeObj* objNode) +void CodeGen::genPutArgStk(GenTreePtr treeNode) { - assert(objNode->OperGet() == GT_OBJ); - - GenTree* addr = objNode->gtOp.gtOp1; - genConsumeAddress(addr); - - regNumber addrReg = addr->gtRegNum; - regNumber targetReg = objNode->gtRegNum; - var_types targetType = objNode->TypeGet(); - emitter * emit = getEmitter(); - - noway_assert(varTypeIsStruct(targetType)); - noway_assert(targetReg != REG_NA); - - CORINFO_CLASS_HANDLE objClass = objNode->gtObj.gtClass; - int structSize = compiler->info.compCompHnd->getClassSize(objClass); + var_types targetType = treeNode->TypeGet(); + emitter *emit = getEmitter(); - assert(structSize <= 2*TARGET_POINTER_SIZE); - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; - compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + // Get argument offset on stack. + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; - var_types type0 = compiler->getJitGCType(gcPtrs[0]); - var_types type1 = compiler->getJitGCType(gcPtrs[1]); +#ifdef DEBUG + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); + assert(curArgTabEntry); + assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); +#endif // DEBUG - bool hasGCpointers = varTypeIsGC(type0) || varTypeIsGC(type1); + GenTreePtr data = treeNode->gtOp.gtOp1; + unsigned varNum; // typically this is the varNum for the Outgoing arg space - noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); +#if FEATURE_FASTTAILCALL + bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; +#else + const bool putInIncomingArgArea = false; +#endif + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (putInIncomingArgArea) + { + // The first varNum is guaranteed to be the first incoming arg of the method being compiled. + // See lvaInitTypeRef() for the order in which lvaTable entries are initialized. + varNum = 0; +#ifdef DEBUG +#if FEATURE_FASTTAILCALL + // This must be a fast tail call. + assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = compiler->lvaTable;mit + assert(varDsc != nullptr); + assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0))); +#endif // FEATURE_FASTTAILCALL +#endif + } + else + { + varNum = compiler->lvaOutgoingArgSpaceVar; + } - // For a 16-byte structSize with GC pointers we will use two ldr instruction to load two registers - // ldr x2, [x0] - // ldr x3, [x0] - // - // For a 16-byte structSize with no GC pointers we will use a ldp instruction to load two registers - // ldp x2, x3, [x0] - // - // For a 12-byte structSize we will we will generate two load instructions - // ldr x2, [x0] - // ldr w3, [x0, #8] - // - // When the first instruction has a targetReg that is the same register - // as the source register: addrReg, we set deferLoad to true and - // issue the intructions in the reverse order: - // ldr w3, [x2, #8] - // ldr x2, [x2] - - bool deferLoad = false; - emitAttr deferAttr = EA_PTRSIZE; - int deferOffset = 0; - int remainingSize = structSize; - unsigned structOffset = 0; - var_types nextType = type0; - - // Use the ldp instruction for a struct that is exactly 16-bytes in size - // ldp x2, x3, [x0] - // - if (remainingSize == 2*TARGET_POINTER_SIZE) + if (targetType != TYP_STRUCT) // a normal non-Struct argument { - if (hasGCpointers) - { - // We have GC pointers use two ldr instructions - // - // We do it this way because we can't currently pass or track - // two different emitAttr values for a ldp instruction. + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); - // Make sure that the first load instruction does not overwrite the addrReg. - // - if (targetReg != addrReg) - { - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); - } - else - { - assert(REG_NEXT(targetReg) != addrReg); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); - } + // If it is contained then data must be the integer constant zero + if (data->isContained()) + { + assert(data->OperGet() == GT_CNS_INT); + assert(data->AsIntConCommon()->IconValue() == 0); + emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNum, argOffset); } else { - // Use a ldp instruction - - getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset); + genConsumeReg(data); + emit->emitIns_S_R(storeIns, storeAttr, data->gtRegNum, varNum, argOffset); } - remainingSize = 0; // We completely wrote the 16-byte struct } - - while (remainingSize > 0) + else // We have a TYP_STRUCT argument (it also must be a 16-byte multi-reg struct) { - if (remainingSize >= TARGET_POINTER_SIZE) + // We will use two store instructions that each write a register sized value + + // We must have a multi-reg struct that takes two slots + assert(curArgTabEntry->numSlots == 2); + assert(data->isContained()); // We expect that this node was marked as contained in LowerArm64 + + // In lowerArm64 we reserved two internal integer registers for this 16-byte TYP_STRUCT + regNumber loReg = REG_NA; + regNumber hiReg = REG_NA; + genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); + assert(loReg != REG_NA); + assert(hiReg != REG_NA); + + // We will need to record the GC type used by each of the load instructions + // so that we use the same type in each of the store instructions + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + if (data->OperGet() == GT_OBJ) { - remainingSize -= TARGET_POINTER_SIZE; + GenTree* objNode = data; + GenTree* addrNode = objNode->gtOp.gtOp1; - if ((targetReg != addrReg) || (remainingSize == 0)) + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) { - noway_assert(targetReg != addrReg); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(nextType), targetReg, addrReg, structOffset); + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as a GT_LCL_VAR node + // so update 'data' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + data = addrNode; } - else + else // We have a GT_OBJ with an address expression { - deferLoad = true; - deferAttr = emitTypeSize(nextType); - deferOffset = structOffset; - } - targetReg = REG_NEXT(targetReg); - structOffset += TARGET_POINTER_SIZE; - nextType = type1; - } - else // (remainingSize < TARGET_POINTER_SIZE) - { - int loadSize = remainingSize; - remainingSize = 0; + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); - // the left over size is smaller than a pointer and thus can never be a GC type - assert(varTypeIsGC(nextType) == false); + regNumber addrReg = addrNode->gtRegNum; + var_types targetType = objNode->TypeGet(); - var_types loadType = TYP_UINT; - if (loadSize == 1) - { - loadType = TYP_UBYTE; - } - else if (loadSize == 2) - { - loadType = TYP_USHORT; + noway_assert(varTypeIsStruct(targetType)); + + CORINFO_CLASS_HANDLE objClass = objNode->gtObj.gtClass; + int structSize = compiler->info.compCompHnd->getClassSize(objClass); + + assert(structSize <= 2*TARGET_POINTER_SIZE); + + // We obtain the gcPtrs values by examining op1 using getClassGClayout() + + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + + // We need to record the GC type to used for each of the loads + type0 = compiler->getJitGCType(gcPtrs[0]); + type1 = compiler->getJitGCType(gcPtrs[1]); + + bool hasGCpointers = varTypeIsGC(type0) || varTypeIsGC(type1); + + noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); + + // For a 16-byte structSize with GC pointers we will use two ldr instruction to load two registers + // ldr x2, [x0] + // ldr x3, [x0] + // + // For a 16-byte structSize with no GC pointers we will use a ldp instruction to load two registers + // ldp x2, x3, [x0] + // + // For a 12-byte structSize we will we will generate two load instructions + // ldr x2, [x0] + // ldr w3, [x0, #8] + // + // When the first instruction has a loReg that is the same register + // as the source register: addrReg, we set deferLoad to true and + // issue the intructions in the reverse order: + // ldr w3, [x2, #8] + // ldr x2, [x2] + + bool deferLoad = false; + emitAttr deferAttr = EA_PTRSIZE; + int deferOffset = 0; + int remainingSize = structSize; + unsigned structOffset = 0; + var_types nextType = type0; + + // Use the ldp instruction for a struct that is exactly 16-bytes in size + // ldp x2, x3, [x0] + // + if (remainingSize == 2*TARGET_POINTER_SIZE) + { + if (hasGCpointers) + { + // We have GC pointers, so use two ldr instructions + // + // We do it this way because we can't currently pass or track + // two different emitAttr values for a ldp instruction. + + // Make sure that the first load instruction does not overwrite the addrReg. + // + if (loReg != addrReg) + { + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset); + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); + } + else + { + assert(hiReg != addrReg); + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset); + } + } + else + { + // Use a ldp instruction + + emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset); + } + remainingSize = 0; // We completely wrote the 16-byte struct + } + + regNumber curReg = loReg; + while (remainingSize > 0) + { + if (remainingSize >= TARGET_POINTER_SIZE) + { + remainingSize -= TARGET_POINTER_SIZE; + + if ((curReg == addrReg) && (remainingSize != 0)) + { + deferLoad = true; + deferAttr = emitTypeSize(nextType); + deferOffset = structOffset; + } + else // the typical case + { + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(nextType), curReg, addrReg, structOffset); + } + curReg = hiReg; + structOffset += TARGET_POINTER_SIZE; + nextType = type1; + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + int loadSize = remainingSize; + remainingSize = 0; + + // the left over size is smaller than a pointer and thus can never be a GC type + assert(varTypeIsGC(nextType) == false); + + var_types loadType = TYP_UINT; + if (loadSize == 1) + { + loadType = TYP_UBYTE; + } + else if (loadSize == 2) + { + loadType = TYP_USHORT; + } + else + { + // Need to handle additional loadSize cases here + noway_assert(loadSize == 4); + } + + instruction loadIns = ins_Load(loadType); + emitAttr loadAttr = emitAttr(loadSize); + + // When deferLoad is false, curReg can be the same as addrReg + // because the last instruction is allowed to overwrite addrReg. + // + noway_assert(!deferLoad || (curReg != addrReg)); + + emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); + } + } + + if (deferLoad) + { + curReg = addrReg; + emit->emitIns_R_R_I(INS_ldr, deferAttr, curReg, addrReg, deferOffset); + } } + } - instruction loadIns = ins_Load(loadType); - emitAttr loadAttr = emitAttr(loadSize); + if ((data->OperGet() == GT_LCL_VAR) || (data->OperGet() == GT_LCL_VAR_ADDR)) + { + GenTreeLclVarCommon* varNode = data->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &compiler->lvaTable[varNum]; + + // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument + assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); + // This struct also must live in the stack frame + assert(varDsc->lvOnFrame); - // When deferLoad is false, targetReg can be the same as addrReg - // because the last instruction is allowed to overwrite addrReg. + // We need to record the GC type to used for each of the loads + // We obtain the GC type values by examining the local's varDsc->lvGcLayout // - noway_assert(!deferLoad || (targetReg != addrReg)); + type0 = compiler->getJitGCType(varDsc->lvGcLayout[0]); + type1 = compiler->getJitGCType(varDsc->lvGcLayout[1]); - getEmitter()->emitIns_R_R_I(loadIns, loadAttr, targetReg, addrReg, structOffset); + emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNum, 0); + emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNum, TARGET_POINTER_SIZE); } - } - if (deferLoad) - { - targetReg = addrReg; - noway_assert(targetReg != addrReg); - getEmitter()->emitIns_R_R_I(INS_ldr, deferAttr, targetReg, addrReg, deferOffset); + // We are required to set these two values above, so that the stores have the same GC type as the loads + assert(type0 != TYP_UNKNOWN); + assert(type1 != TYP_UNKNOWN); + + // Emit two store instructions to store two consecutive registers into the outgoing argument area + emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNum, argOffset); + emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNum, argOffset + TARGET_POINTER_SIZE); } - genProduceReg(objNode); } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 94f6be9cee..ab65d576c4 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -564,6 +564,30 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree) return regMask; } +//------------------------------------------------------------------------ +// getRegistersFromMask: Given a register mask return the two registers +// specified by the mask. +// +// Arguments: +// regPairMask: a register mask that has exactly two bits set +// Return values: +// pLoReg: the address of where to write the first register +// pHiReg: the address of where to write the second register +// +void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg) +{ + assert(genCountBits(regPairMask) == 2); + + regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask + regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask + + regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask + regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask + + *pLoReg = loReg; + *pHiReg = hiReg; +} + /***************************************************************************** * TRACKING OF FLAGS @@ -6205,7 +6229,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE); fltInitReg = reg; #elif defined(_TARGET_ARM64_) - NYI("Initialize double-precision floating-point register to zero"); + // We will just zero out the entire vector register. This sets it to a double zero value + getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); #else // _TARGET_* #error Unsupported or unset target architecture #endif diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h index 5501dbacac..285b397b8d 100644 --- a/src/jit/codegeninterface.h +++ b/src/jit/codegeninterface.h @@ -143,7 +143,7 @@ protected: regMaskTP genLiveMask (GenTreePtr tree); regMaskTP genLiveMask (VARSET_VALARG_TP liveSet); - + void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg); // The following property indicates whether the current method sets up diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index b555646333..dfc5372a02 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -53,10 +53,6 @@ void genCompareLong(GenTreePtr treeNode); #endif -#ifdef _TARGET_ARM64_ - void genCodeForObj(GenTreeObj* treeNode); -#endif - #ifdef FEATURE_SIMD enum SIMDScalarMoveType { diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 3b53e27ae4..59cdeb6dad 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -8804,7 +8804,11 @@ public: unsigned __int8* offset0, unsigned __int8* offset1); void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument); -#endif +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + void fgMorphMultiregStructArgs(GenTreeCall* call); + GenTreePtr fgMorphMultiregStructArg (GenTreePtr arg); + }; // end of class Compiler // Inline methods of CompAllocator. diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 233c5dce78..f2ec935b53 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -3742,55 +3742,73 @@ inline GenTreePtr GenTree::MoveNext() } #ifdef DEBUG +//------------------------------------------------------------------------ +// IsListForMultiRegArg: Given an GenTree node that represents an argument +// enforce (or don't enforce) the following invariant. +// +// For LEGACY_BACKEND or architectures that don't support MultiReg args +// we don't allow a GT_LIST at all. +// +// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is +// allowed but every element must be a GT_LCL_FLD. +// +// For the future targets that allow for Multireg args (and this includes +// the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these +// would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, +// but could be changed into constants or GT_COMMA trees by the later +// optimization phases. +// +// Arguments: +// instance method for a GenTree node +// +// Return values: +// true: the GenTree node is accepted as a valid argument +// false: the GenTree node is not accepted as a valid argumeny +// inline bool GenTree::IsListForMultiRegArg() { if (!IsList()) { - return false; + // We don't have a GT_LIST, so just return true. + return true; } - -#if FEATURE_MULTIREG_ARGS - // We allow a GT_LIST of some nodes as an argument - GenTree* gtListPtr = this; - while (gtListPtr != nullptr) + else // We do have a GT_LIST { - bool allowed = false; -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List - if (gtListPtr->Current() == nullptr) - break; +#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS - // Only a list of GT_LCL_FLDs is allowed - if (gtListPtr->Current()->OperGet() == GT_LCL_FLD) - { - allowed = true; - } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING -#ifdef _TARGET_ARM64_ - // A list of GT_LCL_VARs is allowed - if (gtListPtr->Current()->OperGet() == GT_LCL_VAR) - { - allowed = true; - } - // A list of GT_LCL_FLDs is allowed - else if (gtListPtr->Current()->OperGet() == GT_LCL_FLD) - { - allowed = true; - } -#endif - if (!allowed) + // Not allowed to have a GT_LIST for an argument + // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS + + return false; + +#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes + GenTree* gtListPtr = this; + while (gtListPtr != nullptr) { - return false; + // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List + // Note the list as currently created is malformed, as the last entry is a nullptr + if (gtListPtr->Current() == nullptr) + break; + + // Only a list of GT_LCL_FLDs is allowed + if (gtListPtr->Current()->OperGet() != GT_LCL_FLD) + { + return false; + } + gtListPtr = gtListPtr->MoveNext(); } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - gtListPtr = gtListPtr->MoveNext(); - } + // Note that for non-UNIX ABI the GT_LIST may contain any node + // + // We allow this GT_LIST as an argument + return true; - return true; -#else // FEATURE_MULTIREG_ARGS - // Not allowed to have a GT_LIST here unless we have FEATURE_MULTIREG_ARGS - return false; -#endif +#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS + } } #endif // DEBUG diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 13fc3acf2b..5d80f48fc6 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1268,8 +1268,35 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP } } else -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_ARGS + if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST)) + { + assert(arg->OperGet() == GT_LIST); + GenTreeArgList* argListPtr = arg->AsArgList(); + + for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++) + { + GenTreePtr curOp = argListPtr->gtOp.gtOp1; + var_types curTyp = curOp->TypeGet(); + + // Create a new GT_PUTARG_REG node with op1 + GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp); + + // CopyCosts + newOper->CopyCosts(argListPtr->gtOp.gtOp1); + // Splice in the new GT_PUTARG_REG node in the GT_LIST + SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper); + } + + // Just return arg. The GT_LIST is not replaced. + // Nothing more to do. + return arg; + } + else +#endif // FEATURE_MULTIREG_ARGS +#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); } diff --git a/src/jit/lower.h b/src/jit/lower.h index 232c7b2bc0..6381555949 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -151,7 +151,9 @@ private: #ifdef FEATURE_SIMD void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra); #endif // FEATURE_SIMD - +#ifdef _TARGET_ARM64_ + void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info); +#endif // _TARGET_ARM64_ #if defined(_TARGET_XARCH_) void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind); #endif // defined(_TARGET_XARCH_) diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 71bfc23632..091c4cc7ee 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -143,11 +143,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) break; case GT_STORE_LCL_FLD: - info->srcCount = 1; - info->dstCount = 0; - LowerStoreLoc(tree->AsLclVarCommon()); - break; - case GT_STORE_LCL_VAR: info->srcCount = 1; info->dstCount = 0; @@ -584,97 +579,107 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) if (curArgTabEntry->regNum == REG_STK) { // late arg that is not passed in a register - DISPNODE(argNode); assert(argNode->gtOper == GT_PUTARG_STK); - argNode->gtLsraInfo.srcCount = 1; - argNode->gtLsraInfo.dstCount = 0; + + TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry); continue; } - var_types argType = argNode->TypeGet(); - - callHasFloatRegArgs |= varTypeIsFloating(argType); + var_types argType = argNode->TypeGet(); + bool argIsFloat = varTypeIsFloating(argType); + callHasFloatRegArgs |= argIsFloat; regNumber argReg = curArgTabEntry->regNum; - short regCount = 1; - // Default case is that we consume one source; modify this later (e.g. for - // promoted structs) - info->srcCount++; + // We will setup argMask to the set of all registers that compose this argument + regMaskTP argMask = 0; - regMaskTP argMask = genRegMask(argReg); argNode = argNode->gtEffectiveVal(); - - if (argNode->TypeGet() == TYP_STRUCT) + + // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct + if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST)) { GenTreePtr actualArgNode = argNode; - if (actualArgNode->gtOper == GT_PUTARG_REG) - { - actualArgNode = actualArgNode->gtOp.gtOp1; - } unsigned originalSize = 0; - bool isPromoted = false; - LclVarDsc* varDsc = nullptr; - if (actualArgNode->gtOper == GT_LCL_VAR) - { - varDsc = compiler->lvaTable + actualArgNode->gtLclVarCommon.gtLclNum; - originalSize = varDsc->lvSize(); - } - else if (actualArgNode->gtOper == GT_MKREFANY) - { - originalSize = 2 * TARGET_POINTER_SIZE; - } - else if (actualArgNode->gtOper == GT_OBJ) + + if (argNode->gtOper == GT_LIST) { - CORINFO_CLASS_HANDLE objClass = actualArgNode->gtObj.gtClass; - originalSize = compiler->info.compCompHnd->getClassSize(objClass); + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + GenTreeArgList* argListPtr = argNode->AsArgList(); + + // Initailize the first register and the first regmask in our list + regNumber targetReg = argReg; + regMaskTP targetMask = genRegMask(targetReg); + unsigned iterationNum = 0; + originalSize = 0; + + for (; argListPtr; argListPtr = argListPtr->Rest()) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1; + + originalSize += REGSIZE_BYTES; // 8 bytes + + // Record the register requirements for the GT_PUTARG_REG node + putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask); + putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask); + + // To avoid redundant moves, request that the argument child tree be + // computed in the register in which the argument is passed to the call. + putArgChild ->gtLsraInfo.setSrcCandidates(l, targetMask); + + // We consume one source for each item in this list + info->srcCount++; + iterationNum++; + + // Update targetReg and targetMask for the next putarg_reg (if any) + targetReg = REG_NEXT(targetReg); + targetMask = genRegMask(targetReg); + } } else { - assert(!"Can't predict unsupported TYP_STRUCT arg kind"); + noway_assert(!"Unsupported TYP_STRUCT arg kind"); } - unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; - regNumber reg = (regNumber)(argReg + 1); - unsigned remainingSlots = slots - 1; - - if (remainingSlots > 1) - { - NYI_ARM64("Lower - Struct typed arguments (size>16)"); - } + unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES; + regNumber curReg = argReg; + regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST; + unsigned remainingSlots = slots; - while (remainingSlots > 0 && reg <= REG_ARG_LAST) + while (remainingSlots > 0) { - argMask |= genRegMask(reg); - reg = (regNumber)(reg + 1); + argMask |= genRegMask(curReg); remainingSlots--; - regCount++; - } - if (remainingSlots > 1) - { - NYI_ARM64("Lower - Struct typed arguments (Reg/Stk split)"); - } + if (curReg == lastReg) + break; - short internalIntCount = 0; - if (remainingSlots > 0) - { - // This TYP_STRUCT argument is also passed in the outgoing argument area - // We need a register to address the TYP_STRUCT - // And we may need 2 - internalIntCount = 2; + curReg = REG_NEXT(curReg); } - argNode->gtLsraInfo.internalIntCount = internalIntCount; + + // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed) + noway_assert(remainingSlots == 0); + argNode->gtLsraInfo.internalIntCount = 0; } + else // A scalar argument (not a struct) + { + // We consume one source + info->srcCount++; - argNode->gtLsraInfo.setDstCandidates(l, argMask); - argNode->gtLsraInfo.setSrcCandidates(l, argMask); + argMask |= genRegMask(argReg); + argNode->gtLsraInfo.setDstCandidates(l, argMask); + argNode->gtLsraInfo.setSrcCandidates(l, argMask); - // To avoid redundant moves, have the argument child tree computed in the - // register in which the argument is passed to the call. - if (argNode->gtOper == GT_PUTARG_REG) - { - argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode)); - } + if (argNode->gtOper == GT_PUTARG_REG) + { + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // To avoid redundant moves, request that the argument child tree be + // computed in the register in which the argument is passed to the call. + putArgChild ->gtLsraInfo.setSrcCandidates(l, argMask); + } + } } // Now, count stack args @@ -688,14 +693,29 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) while (args) { GenTreePtr arg = args->gtOp.gtOp1; + + // Skip arguments that havew been moved to the Late Arg list if (!(args->gtFlags & GTF_LATE_ARG)) - { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) + { + if (arg->gtOper == GT_PUTARG_STK) { - argInfo->isLocalDefUse = true; + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(tree, arg); + assert(curArgTabEntry); + + assert(curArgTabEntry->regNum == REG_STK); + + TreeNodeInfoInitPutArgStk(arg, curArgTabEntry); + } + else + { + TreeNodeInfo* argInfo = &(arg->gtLsraInfo); + if (argInfo->dstCount != 0) + { + argInfo->isLocalDefUse = true; + } + + argInfo->dstCount = 0; } - argInfo->dstCount = 0; } args = args->gtOp.gtOp2; } @@ -997,6 +1017,60 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } //------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// None. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // Initialize 'argNode' as not contained, as this is both the default case + // and how MakeSrcContained expects to find things setup. + // + argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.dstCount = 0; + + // Do we have a TYP_STRUCT argument, if so it must be a 16-byte pass-by-value struct + if (putArgChild->TypeGet() == TYP_STRUCT) + { + // We will use two store instructions that each write a register sized value + + // We must have a multi-reg struct + assert(info->numSlots >= 2); + + // We can use a ldp/stp sequence so we need two internal registers + argNode->gtLsraInfo.internalIntCount = 2; + + if (putArgChild->OperGet() == GT_OBJ) + { + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + } + + // We will generate all of the code for the GT_PUTARG_STK and it's child node + // as one contained operation + // + MakeSrcContained(argNode, putArgChild); + } +} + +//------------------------------------------------------------------------ // TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. // // Arguments: diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 19eee632e2..799b1da34b 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2543,33 +2543,6 @@ LinearScan::getKillSetForNode(GenTree* tree) break; #endif // PROFILING_SUPPORTED && _TARGET_AMD64_ -#if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ - case GT_PUTARG_REG: - // TODO-Cleanup: Remove this code after Issue #3524 is complete - // - // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 - // We actually write a second register that isn't being properly tracked - // We can prevent anyone else from being alive at this point by adding - // an extra RefTypeKill for the second register. - // - if (tree->TypeGet() == TYP_STRUCT) - { - TreeNodeInfo info = tree->gtLsraInfo; - regMaskTP dstMask = info.getDstCandidates(this); - - // Make sure that the dstMask represents two consecutive registers - regMaskTP lowRegBit = genFindLowestBit(dstMask); - regMaskTP nextRegBit = lowRegBit << 1; - regMaskTP regPairMask = (lowRegBit | nextRegBit); - - assert(dstMask == regPairMask); - - killMask = nextRegBit; // setup killMask to be the mask for the second register. - } -#endif // _TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS - default: // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE break; @@ -4567,54 +4540,6 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi singleReg = genRegNumFromMask(candidates); regOrder = &singleReg; } -#if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ - // TODO-Cleanup: Remove this code after Issue #3524 is complete - // - // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 - if (regType == TYP_STRUCT) - { - // We currently use two consecutive registers: - // to pass in argument registers or - // to load and the store into the outgoing arg space - - // TODO: revisit this and remove the limitation that we use two consecutive registers. - - // Make sure that we have two consecutive registers available - regMaskTP lowRegBit = genFindLowestBit(candidates); - regMaskTP nextRegBit = lowRegBit << 1; - regMaskTP regPairMask = (lowRegBit | nextRegBit); - - do { - // Are there two consecutive register bits available? - if ((candidates & regPairMask) == regPairMask) - { - // We use the same trick as above when regOrderSize, singleReg and regOrder are set - regOrderSize = 1; - singleReg = genRegNumFromMask(lowRegBit); - regOrder = &singleReg; - break; - } - // setup the next register pair bit - lowRegBit = nextRegBit; - nextRegBit = lowRegBit << 1; // shift left by one bit - regPairMask = (lowRegBit | nextRegBit); - - } while (nextRegBit != 0); // If we shifted out all of the bits then nextRegBit will become zero - // Note that shifting out all of the bits is an error, and we catch it with the following noway_assert - - // Make sure we took the break to exit the while loop - noway_assert(singleReg != REG_NA); - - // Unless we setup singleReg we have to issue an NYI error here - if (singleReg == REG_NA) - { - // Need support for MultiReg sized structs - NYI("Multireg struct - LinearScan::tryAllocateFreeReg"); - } - } -#endif // _TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++) { @@ -5116,23 +5041,6 @@ void LinearScan::assignPhysReg( RegRecord * regRec, Interval * interval) } #endif // _TARGET_ARM_ -#if FEATURE_MULTIREG_ARGS_OR_RET -#ifdef _TARGET_ARM64_ - // TODO-Cleanup: Remove this code after Issue #3524 is complete - // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 - if (interval->registerType == TYP_STRUCT) - { - // We use two consecutive registers: - // to pass in argument registers or - // to load and the store into the outgoing arg space - regNumber nextRegNum = REG_NEXT(regRec->regNum); - RegRecord * nextRegRec = getRegisterRecord(nextRegNum); - - checkAndAssignInterval(nextRegRec, interval); - } -#endif // _TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS_OR_RET - interval->physReg = regRec->regNum; interval->isActive = true; if (interval->isLocalVar) @@ -5293,24 +5201,6 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit } #endif // _TARGET_ARM_ -#if FEATURE_MULTIREG_ARGS_OR_RET -#ifdef _TARGET_ARM64_ - // TODO-Cleanup: Remove this code after Issue #3524 is complete - // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 - if (assignedInterval->registerType == TYP_STRUCT) - { - - // We use two consecutive registers: - // to pass in argument registers or - // to load and the store into the outgoing arg space - - regNumber nextRegNum = REG_NEXT(regRec->regNum); - RegRecord * nextRegRec = getRegisterRecord(nextRegNum); - checkAndClearInterval(nextRegRec, spillRefPosition); - } -#endif // _TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS_OR_RET - #ifdef DEBUG if (VERBOSE && !dumpTerse) { diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index c38b906a07..567c569223 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1580,6 +1580,38 @@ void fgArgInfo::ArgsComplete() #endif } } + +#ifndef LEGACY_BACKEND + // For RyuJIT backend we will expand a Multireg arg into a GT_LIST + // with multiple indirections, so here we consider spilling it into a tmp LclVar. + // + // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS + // so we skip this for ARM32 until it is ported to use RyuJIT backend + // +#if FEATURE_MULTIREG_ARGS + if ((argx->TypeGet() == TYP_STRUCT) && + (curArgTabEntry->numRegs > 1) && + (curArgTabEntry->needTmp == false)) + { + if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) + { + // Spill multireg struct arguments that have Assignments or Calls embedded in them + curArgTabEntry->needTmp = true; + } + else + { + // We call gtPrepareCost to measure the cost of evaluating this tree + compiler->gtPrepareCost(argx); + + if (argx->gtCostEx > (6 * IND_COST_EX)) + { + // Spill multireg struct arguments that are expensive to evaluate twice + curArgTabEntry->needTmp = true; + } + } + } +#endif // FEATURE_MULTIREG_ARGS +#endif // LEGACY_BACKEND } @@ -2905,7 +2937,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - bool hasStructArgument = false; + bool hasStructArgument = false; // @TODO-ARM64-UNIX: Eemove this bool during a future refactoring + bool hasMultiregStructArgs = false; for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) { GenTreePtr * parentArgx = &args->gtOp.gtOp1; @@ -2916,6 +2949,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1); } #endif // FEATURE_MULTIREG_ARGS + argx = fgMorphTree(*parentArgx); *parentArgx = argx; flagsSummary |= argx->gtFlags; @@ -3108,6 +3142,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc); + if (size > 1) + { + hasMultiregStructArgs = true; + } } #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' @@ -3121,6 +3159,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { size = 1; // Large structs are passed by reference (to a copy) } + else if (size == 2) + { + hasMultiregStructArgs = true; + } // Note that there are some additional rules for size=2 structs, // (i.e they cannot be split betwen registers and the stack) } @@ -3411,6 +3453,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) } } + if (size > 1) + { + hasMultiregStructArgs = true; + } } // The 'size' value has now must have been set. (the original value of zero is an invalid value) @@ -3954,7 +4000,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) bool needEvalArgsToTemps = true; - if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument)) + if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument)) { needEvalArgsToTemps = false; } @@ -3976,8 +4022,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Rewrite the struct args to be passed by value on stack or in registers. fgMorphSystemVStructArgs(call, hasStructArgument); + +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + + // In the future we can migrate UNIX_AMD64 to use this + // method instead of fgMorphSystemVStructArgs +#ifndef LEGACY_BACKEND + // We only build GT_LISTs for MultiReg structs for the RyuJIT backend + if (hasMultiregStructArgs) + { + fgMorphMultiregStructArgs(call); + } +#endif // LEGACY_BACKEND + #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING return call; @@ -4173,6 +4233,336 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +//----------------------------------------------------------------------------- +// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and +// call fgMorphMultiregStructArg on each of them. +// +// Arguments: +// call: a GenTreeCall node that has one or more TYP_STRUCT arguments +// +// Notes: +// We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments. +// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form +// whicj is only used for register arguments. +// If this method fails to find any TYP_STRUCT arguments it will assert. +// +void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) +{ + GenTreePtr args; + GenTreePtr argx; + bool foundStructArg = false; + unsigned initialFlags = call->gtFlags; + unsigned flagsSummary = 0; + fgArgInfoPtr allArgInfo = call->fgArgInfo; + + // Currently only ARM64 is using this method to morph the MultiReg struct args + // in the future AMD64_UNIX and for HFAs ARM32, will also use this method + // +#ifdef _TARGET_ARM_ + NYI_ARM("fgMorphMultiregStructArgs"); +#endif +#ifdef _TARGET_X86_ + assert("Logic error: no MultiregStructArgs for X86"); +#endif +#ifdef _TARGET_AMD64_ +#if defined(UNIX_AMD64_ABI) + NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)"); +#else +#endif + assert("Logic error: no MultiregStructArgs for Windows X64 ABI"); +#endif + + for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2) + { + // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. + // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.) + // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping + // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself, + // otherwise points to the list in the late args list. + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + + GenTreePtr arg = argx; + + if (arg->TypeGet() == TYP_STRUCT) + { + foundStructArg = true; + + // We don't create GT_LIST for any multireg TYP_STRUCT arguments + if (fgEntryPtr->regNum == REG_STK) + { + continue; + } + + arg = fgMorphMultiregStructArg(arg); + + // Did we replace 'argx' with a new tree? + if (arg != argx) + { + bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); + assert(fgEntryPtr != nullptr); + GenTreePtr argx = fgEntryPtr->node; + GenTreePtr lateList = nullptr; + GenTreePtr lateNode = nullptr; + if (isLateArg) + { + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + if (argx == argNode) + { + lateList = list; + lateNode = argNode; + break; + } + } + assert(lateList != nullptr && lateNode != nullptr); + } + + fgEntryPtr->node = arg; + if (isLateArg) + { + lateList->gtOp.gtOp1 = arg; + } + else + { + args->gtOp.gtOp1 = arg; + } + } + } + } + + // We should only call this method when we actually have one or more multireg struct args + assert(foundStructArg); + + // Update the flags + call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); +} + + +//----------------------------------------------------------------------------- +// fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list +// Morph the argument into a set of GT_LIST nodes. +// +// Arguments: +// arg - A GenTree node containing a TYP_STRUCT arg that +// is to be passed in multiple registers +// Notes: +// arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable +// for passing in multiple registers. +// If arg is a LclVar we check if it is struct promoted and has the right number of fields +// and if they are at the appropriate offsets we will use the struct promted fields +// in the GT_LIST nodes that we create. +// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements +// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct +// this also forces the struct to be stack allocated into the local frame. +// For the GT_OBJ case will clone the address expression and generate two (or more) +// indirections. +// Currently the implementation only handles ARM64 and will NYI for other architectures. +// And for ARM64 we do not ye handle HFA arguments, so only 16-byte struct sizes are supported. +// +GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg) +{ + GenTreeArgList* newArg = nullptr; + assert(arg->TypeGet() == TYP_STRUCT); + GenTreePtr argValue = arg; + +#ifndef _TARGET_ARM64_ + NYI("fgMorphMultiregStructArg non-ARM64 implementation"); +#endif + + // If we have a GT_OBJ of a GT_ADDR then + // we set argValue to the child node ofthe GT_ADDR + if (arg->OperGet() == GT_OBJ) + { + GenTreePtr argAddr = arg->gtOp.gtOp1; + + if (argAddr->OperGet() == GT_ADDR) + { + argValue = argAddr->gtOp.gtOp1; + } + } + // We should still have a TYP_STRUCT + assert(argValue->TypeGet() == TYP_STRUCT); + + // Are we passing a struct LclVar? + // + if (argValue->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument + assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); + + const BYTE * gcPtrs = varDsc->lvGcLayout; + + var_types type0 = getJitGCType(gcPtrs[0]); + var_types type1 = getJitGCType(gcPtrs[1]); + + varDsc->lvIsMultiRegArgOrRet = true; + + // Is this LclVar a promoted struct with exactly two fields? + if ((varDsc->lvPromoted) && (varDsc->lvFieldCnt == 2)) + { + // See if we have two promoted fields that start at offset 0 and 8? + unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); + unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE); + + // Did we find the promoted fields at the necessary offsets? + if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM)) + { + LclVarDsc* loVarDsc = &lvaTable[loVarNum]; + LclVarDsc* hiVarDsc = &lvaTable[hiVarNum]; + + var_types loType = loVarDsc->lvType; + var_types hiType = hiVarDsc->lvType; + + GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum); + GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum); + + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr)) + // + newArg = gtNewListNode(loLclVar, gtNewArgList(hiLclVar)); + } + } + if (newArg == nullptr) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + // + // We weren't able to pass this LclVar using it's struct promted fields + // + // Instead we will create a list of GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + + GenTreePtr loLclFld = gtNewLclFldNode(varNum, type0, 0); + GenTreePtr hiLclFld = gtNewLclFldNode(varNum, type1, TARGET_POINTER_SIZE); + + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr)) + // + newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld)); + } + } + // Are we passing a GT_LCL_FLD which contain a 16-byte struct inside it? + // + else if (argValue->OperGet() == GT_LCL_FLD) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + unsigned baseOffset = argValue->gtLclFld.gtLclOffs; + unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; + unsigned requiredSize = baseOffset + (2 * TARGET_POINTER_SIZE); + + // The allocated size of our LocalVar must be at least as big as requiredSize + assert(varDsc->lvSize() >= requiredSize); + + const BYTE * gcPtrs = varDsc->lvGcLayout; + + var_types type0 = getJitGCType(gcPtrs[baseIndex+0]); + var_types type1 = getJitGCType(gcPtrs[baseIndex+1]); + + // + // We create a list of two GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + + GenTreePtr loLclFld = gtNewLclFldNode(varNum, type0, baseOffset); + GenTreePtr hiLclFld = gtNewLclFldNode(varNum, type1, baseOffset + TARGET_POINTER_SIZE); + + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr)) + // + newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld)); + } + // Are we passing a GT_OBJ struct? + // + else if (argValue->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argValue->AsObj(); + CORINFO_CLASS_HANDLE objClass = argObj->gtClass; + + int structSize = info.compCompHnd->getClassSize(objClass); + assert(structSize <= 2 * TARGET_POINTER_SIZE); + BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE }; + info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + + var_types type0 = getJitGCType(gcPtrs[0]); + var_types type1 = getJitGCType(gcPtrs[1]); + + GenTreePtr baseAddr = argObj->gtOp1; + GenTreePtr baseAddrDup = gtCloneExpr(baseAddr); + noway_assert(baseAddrDup != nullptr); + + var_types addrType = baseAddr->TypeGet(); + GenTreePtr loAddr = baseAddr; + GenTreePtr hiAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL)); + GenTreePtr loValue = gtNewOperNode(GT_IND, type0, loAddr); + GenTreePtr hiValue = gtNewOperNode(GT_IND, type1, hiAddr); + + // Create a new tree for 'arg' + // replace the existing LDOBJ(EXPR) + // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr)) + // + newArg = gtNewListNode(loValue, gtNewArgList(hiValue)); + } + else + { + assert(!"Missing case in fgMorphMultiregStructArg"); + } + + assert(newArg != nullptr); + +#ifdef DEBUG + if (verbose) + { + printf("fgMorphMultiregStructArg created tree:\n"); + gtDispTree(newArg); + } +#endif + + arg = newArg; // consider calling fgMorphTree(newArg); + return arg; +} + // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg void @@ -15170,13 +15560,13 @@ void Compiler::fgPromoteStructs() JITDUMP("Stopped promoting struct fields, due to too many locals.\n"); break; } -#if FEATURE_MULTIREG_ARGS_OR_RET +#if !FEATURE_MULTIREG_STRUCT_PROMOTE if (varDsc->lvIsMultiRegArgOrRet) { JITDUMP("Skipping V%02u: marked lvIsMultiRegArgOrRet.\n", lclNum); continue; } -#endif // FEATURE_MULTIREG_ARGS_OR_RET +#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE #ifdef FEATURE_SIMD if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) @@ -15209,7 +15599,6 @@ void Compiler::fgPromoteStructs() lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); continue; } - #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) // TODO-PERF - Only do this when the LclVar is used in an argument context // TODO-ARM64 - HFA support should also eliminate the need for this. @@ -15226,7 +15615,7 @@ void Compiler::fgPromoteStructs() continue; } #endif // _TARGET_AMD64_ || _TARGET_ARM64_ -#if FEATURE_MULTIREG_ARGS +#if !FEATURE_MULTIREG_STRUCT_PROMOTE #if defined(_TARGET_ARM64_) // // For now we currently don't promote structs that could be passed in registers @@ -15238,10 +15627,22 @@ void Compiler::fgPromoteStructs() continue; } #endif // _TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS +#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE if (varDsc->lvIsParam) { +#if FEATURE_MULTIREG_STRUCT_PROMOTE + if (varDsc->lvIsMultiRegArgOrRet) // Is this argument variable holding a value passed in multiple registers? + { + if (structPromotionInfo.fieldCnt != 2) + { + JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true and #fields = %d.\n", + lclNum, structPromotionInfo.fieldCnt); + continue; + } + } + else +#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE if (structPromotionInfo.fieldCnt != 1) { JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = %d.\n", diff --git a/src/jit/target.h b/src/jit/target.h index 88ad1b0bb9..4726c7e3da 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -372,6 +372,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set @@ -692,6 +693,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_WRITE_BARRIER 1 // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers) #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set @@ -1124,6 +1126,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set @@ -1438,6 +1441,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set @@ -1647,6 +1651,8 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_ARG_FIRST REG_R0 #define REG_ARG_LAST REG_R7 + #define REG_ARG_FP_FIRST REG_V0 + #define REG_ARG_FP_LAST REG_V7 #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots #define REG_ARG_0 REG_R0 |