diff options
author | Mikhail Skvortcov <m.skvortcov@partner.samsung.com> | 2016-12-16 13:41:10 +0300 |
---|---|---|
committer | Mikhail Skvortcov <m.skvortcov@partner.samsung.com> | 2016-12-22 19:27:19 +0300 |
commit | ca9f5f7062bdf4970f124d64d8d8e0a10fb26cc3 (patch) | |
tree | 91a47fe5581dc1f2aefbf259be869a3c8d5747e0 /src/jit | |
parent | 2d49c2c743831b7078c4360f28a81ba28fc47a05 (diff) | |
download | coreclr-ca9f5f7062bdf4970f124d64d8d8e0a10fb26cc3.tar.gz coreclr-ca9f5f7062bdf4970f124d64d8d8e0a10fb26cc3.tar.bz2 coreclr-ca9f5f7062bdf4970f124d64d8d8e0a10fb26cc3.zip |
ARM: A step towards the RyuJIT/ARM32 backend.
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenarm.cpp | 683 | ||||
-rw-r--r-- | src/jit/codegenlinear.cpp | 7 | ||||
-rw-r--r-- | src/jit/compiler.cpp | 4 | ||||
-rw-r--r-- | src/jit/emitarm.cpp | 57 | ||||
-rw-r--r-- | src/jit/lower.h | 3 | ||||
-rw-r--r-- | src/jit/lowerarm.cpp | 1075 | ||||
-rw-r--r-- | src/jit/rationalize.cpp | 2 | ||||
-rw-r--r-- | src/jit/target.h | 3 |
8 files changed, 1697 insertions, 137 deletions
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 73e51f2ef7..a7bd93115d 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -23,15 +23,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfo.h" #include "emit.h" -#ifndef JIT32_GCENCODER -#include "gcinfoencoder.h" -#endif - -/***************************************************************************** - * - * Generate code that will set the given register to the integer constant. - */ - +//------------------------------------------------------------------------ +// genSetRegToIcon: Generate code that will set the given register to the integer constant. +// void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) { // Reg cannot be a FP reg @@ -42,41 +36,36 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla // code path. noway_assert(type != TYP_REF || val == 0); - if (val == 0) - { - instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); - } - else - { - // TODO-CQ: needs all the optimized cases - getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val); - } + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); } -/***************************************************************************** - * - * Generate code to check that the GS cookie wasn't thrashed by a buffer - * overrun. If pushReg is true, preserve all registers around code sequence. - * Otherwise, ECX maybe modified. - */ +//------------------------------------------------------------------------ +// genEmitGSCookieCheck: Generate code to check that the GS cookie wasn't thrashed by a buffer overrun. +// void CodeGen::genEmitGSCookieCheck(bool pushReg) { NYI("ARM genEmitGSCookieCheck"); } +//------------------------------------------------------------------------ +// genCallFinally: Generate a call to the finally block. +// BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk) { NYI("ARM genCallFinally"); return block; } -// move an immediate value into an integer register - +//------------------------------------------------------------------------ +// genEHCatchRet: void CodeGen::genEHCatchRet(BasicBlock* block) { NYI("ARM genEHCatchRet"); } +//------------------------------------------------------------------------ +// instGen_Set_Reg_To_Imm: Move an immediate value into an integer register. +// void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) { // reg cannot be a FP register @@ -87,23 +76,60 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs } - if ((imm == 0) && !EA_IS_RELOC(size)) + if (EA_IS_RELOC(size)) + { + getEmitter()->emitIns_R_I(INS_movw, size, reg, imm); + getEmitter()->emitIns_R_I(INS_movt, size, reg, imm); + } + else if (imm == 0) { instGen_Set_Reg_To_Zero(size, reg, flags); } else { - getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); + if (arm_Valid_Imm_For_Mov(imm)) + { + getEmitter()->emitIns_R_I(INS_mov, size, reg, imm, flags); + } + else // We have to use a movw/movt pair of instructions + { + ssize_t imm_lo16 = (imm & 0xffff); + ssize_t imm_hi16 = (imm >> 16) & 0xffff; + + assert(arm_Valid_Imm_For_Mov(imm_lo16)); + assert(imm_hi16 != 0); + + getEmitter()->emitIns_R_I(INS_movw, size, reg, imm_lo16); + + // If we've got a low register, the high word is all bits set, + // and the high bit of the low word is set, we can sign extend + // halfword and save two bytes of encoding. This can happen for + // small magnitude negative numbers 'n' for -32768 <= n <= -1. + + if (getEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000)) + { + getEmitter()->emitIns_R_R(INS_sxth, EA_2BYTE, reg, reg); + } + else + { + getEmitter()->emitIns_R_I(INS_movt, size, reg, imm_hi16); + } + + if (flags == INS_FLAGS_SET) + getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET); + } } + regTracker.rsTrackRegIntCns(reg, imm); } -/***************************************************************************** - * - * Generate code to set a register 'targetReg' of type 'targetType' to the constant - * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call - * genProduceReg() on the target register. - */ +//------------------------------------------------------------------------ +// genSetRegToConst: Generate code to set a register 'targetReg' of type 'targetType' +// to the constant specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. +// +// Notes: +// This does not call genProduceReg() on the target register. +// void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree) { switch (tree->gtOper) @@ -139,18 +165,22 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } } -/***************************************************************************** - * - * Generate code for a single node in the tree. - * Preconditions: All operands have been evaluated - * - */ +//------------------------------------------------------------------------ +// genCodeForTreeNode Generate code for a single node in the tree. +// +// Preconditions: +// All operands have been evaluated. +// void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter* emit = getEmitter(); +#ifdef DEBUG + lastConsumedNode = nullptr; +#endif + JITDUMP("Generating: "); DISPNODE(treeNode); @@ -169,10 +199,26 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genProduceReg(treeNode); break; - case GT_NEG: case GT_NOT: + assert(!varTypeIsFloating(targetType)); + + __fallthrough; + + case GT_NEG: { - NYI("GT_NEG and GT_NOT"); + instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!treeNode->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTreePtr operand = treeNode->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + getEmitter()->emitIns_R_R_I(ins, emitTypeSize(treeNode), targetReg, operandReg, 0); } genProduceReg(treeNode); break; @@ -185,9 +231,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_ADD: case GT_SUB: + case GT_MUL: { const genTreeOps oper = treeNode->OperGet(); - if ((oper == GT_ADD || oper == GT_SUB) && treeNode->gtOverflow()) + if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow()) { // This is also checked in the importer. NYI("Overflow not yet implemented"); @@ -429,17 +476,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_IND: + genConsumeAddress(treeNode->AsIndir()->Addr()); emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode); genProduceReg(treeNode); break; - case GT_MUL: - { - NYI("GT_MUL"); - } - genProduceReg(treeNode); - break; - case GT_MOD: case GT_UDIV: case GT_UMOD: @@ -579,7 +620,68 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_STOREIND: { - NYI("GT_STOREIND"); + GenTreeStoreInd* storeInd = treeNode->AsStoreInd(); + GenTree* data = storeInd->Data(); + GenTree* addr = storeInd->Addr(); + var_types targetType = storeInd->TypeGet(); + + assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(storeInd->AsOp()); + +#if NOGC_WRITE_BARRIERS + NYI_ARM("NOGC_WRITE_BARRIERS"); +#else + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } +#endif // NOGC_WRITE_BARRIERS + + genGCWriteBarrier(storeInd, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd); + } } break; @@ -682,7 +784,14 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_NO_OP: - NYI("GT_NO_OP"); + if (treeNode->gtFlags & GTF_NO_OP_NO) + { + noway_assert(!"GTF_NO_OP_NO should not be set"); + } + else + { + instGen(INS_nop); + } break; case GT_ARR_BOUNDS_CHECK: @@ -733,13 +842,22 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum); break; + case GT_CLS_VAR_ADDR: + emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); + genProduceReg(treeNode); + break; + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + default: { #ifdef DEBUG char message[256]; _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); - notYetImplemented(message, __FILE__, __LINE__); + NYIRAW(message); #else NYI("unimplemented node"); #endif @@ -748,14 +866,20 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) } } -// generate code for the locked operations: -// GT_LOCKADD, GT_XCHG, GT_XADD +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for the locked operations. +// +// Notes: +// Handles GT_LOCKADD, GT_XCHG, GT_XADD nodes. +// void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { NYI("genLockedInstructions"); } -// generate code for GT_ARR_BOUNDS_CHECK node +//------------------------------------------------------------------------ +// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node. +// void CodeGen::genRangeCheck(GenTreePtr oper) { noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); @@ -791,8 +915,9 @@ void CodeGen::genRangeCheck(GenTreePtr oper) genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); } -// make a temporary indir we can feed to pattern matching routines -// in cases where we don't want to instantiate all the indirs that happen +//------------------------------------------------------------------------ +// indirForm: Make a temporary indir we can feed to pattern matching routines +// in cases where we don't want to instantiate all the indirs that happen. // GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) { @@ -804,8 +929,9 @@ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) return i; } -// make a temporary int we can feed to pattern matching routines -// in cases where we don't want to instantiate +//------------------------------------------------------------------------ +// intForm: Make a temporary int we can feed to pattern matching routines +// in cases where we don't want to instantiate. // GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) { @@ -817,6 +943,9 @@ GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) return i; } +//------------------------------------------------------------------------ +// genGetInsForOper: Return instruction encoding of the operation tree. +// instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins; @@ -878,21 +1007,331 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) // void CodeGen::genCodeForShift(GenTreePtr tree) { - NYI("genCodeForShift"); + var_types targetType = tree->TypeGet(); + genTreeOps oper = tree->OperGet(); + instruction ins = genGetInsForOper(oper, targetType); + emitAttr size = emitTypeSize(tree); + + assert(tree->gtRegNum != REG_NA); + + GenTreePtr operand = tree->gtGetOp1(); + genConsumeReg(operand); + + GenTreePtr shiftBy = tree->gtGetOp2(); + if (!shiftBy->IsCnsIntOrI()) + { + genConsumeReg(shiftBy); + getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); + } + else + { + unsigned immWidth = size * BITS_PER_BYTE; + ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1); + + getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm); + } + + genProduceReg(tree); } +//------------------------------------------------------------------------ +// genRegCopy: Generate a register copy. +// void CodeGen::genRegCopy(GenTree* treeNode) { NYI("genRegCopy"); } -// Produce code for a GT_CALL node +//------------------------------------------------------------------------ +// genCallInstruction: Produce code for a GT_CALL node +// void CodeGen::genCallInstruction(GenTreePtr node) { - NYI("Call not implemented"); + GenTreeCall* call = node->AsCall(); + + assert(call->gtOper == GT_CALL); + + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Consume all the arg regs + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + continue; + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + regNumber argReg = curArgTabEntry->regNum; + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, + putArgRegNode->gtRegNum); + } + + argReg = genRegArgNext(argReg); + } + } + else + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } + } + + // In the case of a varargs call, + // the ABI dictates that if we have floating point args, + // we must pass the enregistered arguments in both the + // integer and floating point registers so, let's do that. + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + NYI_ARM("CodeGen - IsVarargs"); + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + const regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCall.gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. + if (call->IsFastTailCall()) + { + NYI_ARM("fast tail call"); + } + + // For a pinvoke to unmanaged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (call->IsUnmanaged()) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + + if (call->HasMultiRegRetVal()) + { + NYI_ARM("has multi reg ret val"); + } + else + { + assert(!varTypeIsStruct(call)); + + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } + + if (target != nullptr) + { + // For ARM a call target can not be a contained indirection + assert(!target->isContainedIndir()); + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->gtRegNum)); + + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr + retSize, ilOffset, target->gtRegNum); + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; + if (callType == CT_HELPER) + { + // Direct call to a helper method. + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + if (addr == nullptr) + { + addr = pAddr; + } + } + else + { + // Direct call to a non-virtual user function. + CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; + if (call->IsSameThis()) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); + } + + if ((call->NeedsNullCheck()) == 0) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); + } + + CORINFO_CONST_LOOKUP addrInfo; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); + + addr = addrInfo.addr; + } + + assert(addr); + // Non-virtual direct call to known addresses + if (!arm_Valid_Imm_For_BL((ssize_t)addr)) + { + regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs); + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); + genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg); + } + else + { + genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset); + } + } + + // if it was a pinvoke we may have needed to get the address of a label + if (genPendingCallLabel) + { + assert(call->IsUnmanaged()); + genDefineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + } + else + { + if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->gtRegNum != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } } -// produce code for a GT_LEA subnode +//------------------------------------------------------------------------ +// genLeaInstruction: Produce code for a GT_LEA subnode. +// void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) { if (lea->Base() && lea->Index()) @@ -909,9 +1348,12 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) genProduceReg(lea); } -// Generate code to materialize a condition into a register -// (the condition codes must already have been appropriately set) - +//------------------------------------------------------------------------ +// genSetRegToCond: Generate code to materialize a condition into a register. +// +// Preconditions: +// The condition codes must already have been appropriately set. +// void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) { NYI("genSetRegToCond"); @@ -993,28 +1435,12 @@ void CodeGen::genFloatToIntCast(GenTreePtr treeNode) NYI("Cast"); } -/***************************************************************************** - * - * Create and record GC Info for the function. - */ -#ifdef JIT32_GCENCODER -void* -#else -void -#endif -CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) -{ -#ifdef JIT32_GCENCODER - return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); -#else - genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); -#endif -} - -// TODO-ARM-Cleanup: It seems that the ARM JIT (classic and otherwise) uses this method, so it seems to be -// inappropriately named? - -void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +//------------------------------------------------------------------------ +// genCreateAndStoreGCInfo: Create and record GC Info for the function. +// +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) { IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) @@ -1039,20 +1465,73 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface } -/***************************************************************************** - * Emit a call to a helper function. - */ - -void CodeGen::genEmitHelperCall(unsigned helper, - int argSize, - emitAttr retSize -#ifndef LEGACY_BACKEND - , - regNumber callTargetReg /*= REG_NA */ -#endif // !LEGACY_BACKEND - ) +//------------------------------------------------------------------------ +// genEmitHelperCall: Emit a call to a helper function. +// +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) { - NYI("Helper call"); + // Can we call the helper function directly + + void *addr = NULL, **pAddr = NULL; + +#if defined(DEBUG) && defined(PROFILING_SUPPORTED) + // Don't ask VM if it hasn't requested ELT hooks + if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled && + (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE || + helper == CORINFO_HELP_PROF_FCN_TAILCALL)) + { + addr = compiler->compProfilerMethHnd; + } + else +#endif + { + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr); + } + + if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr)) + { + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + } + + // Load the address into a register and call through a register + if (addr) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, callTargetReg, (ssize_t)addr); + } + else + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr); + regTracker.rsTrackRegTrash(callTargetReg); + } + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr + argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, // ilOffset + callTargetReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } + else + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0, + 0, /* ilOffset, ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } + + regTracker.rsTrashRegSet(RBM_CALLEE_TRASH); + regTracker.rsTrashRegsForGCInterruptability(); } #endif // _TARGET_ARM_ diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index 9713288e08..9cb5204d8d 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -906,6 +906,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) // Load local variable from its home location. inst_RV_TT(ins, dstReg, unspillTree, 0, attr); +#elif defined(_TARGET_ARM_) + var_types targetType = unspillTree->gtType; + instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)); + emitAttr attr = emitTypeSize(targetType); + + // Load local variable from its home location. + inst_RV_TT(ins, dstReg, unspillTree, 0, attr); #else NYI("Unspilling not implemented for this target architecture."); #endif diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 114847c0d0..c4ccf88e09 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -4598,6 +4598,10 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD; assert(REG_OPT_RSVD != REG_FP); } + // compRsvdRegCheck() has read out the FramePointerUsed property, but doLinearScan() + // tries to overwrite it later. This violates the PhasedVar rule and triggers an assertion. + // TODO-ARM-Bug?: What is the proper way to handle this situation? + codeGen->resetFramePointerUsedWritePhase(); #ifdef DEBUG // diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index 45928ca2d2..1b3ef1bdc7 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -7536,31 +7536,53 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) switch (node->OperGet()) { case GT_IND: - { - GenTree* addr = node->gtGetOp1(); - assert(!addr->isContained()); - codeGen->genConsumeReg(addr); - emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum); - } - break; - case GT_STOREIND: { - GenTree* addr = node->gtGetOp1(); - GenTree* data = node->gtOp.gtOp2; + GenTreeIndir* indir = node->AsIndir(); + GenTree* addr = indir->Addr(); + GenTree* data = indir->gtOp.gtOp2; - assert(!addr->isContained()); - assert(!data->isContained()); - codeGen->genConsumeReg(addr); - codeGen->genConsumeReg(data); + regNumber reg = (node->OperGet() == GT_IND) ? node->gtRegNum : data->gtRegNum; - if (addr->OperGet() == GT_CLS_VAR_ADDR) + if (addr->isContained()) { - emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); + assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA); + + int offset = 0; + DWORD lsl = 0; + + if (addr->OperGet() == GT_LEA) + { + offset = (int)addr->AsAddrMode()->gtOffset; + if (addr->AsAddrMode()->gtScale > 0) + { + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); + } + } + + GenTree* memBase = indir->Base(); + + if (indir->HasIndex()) + { + NYI_ARM("emitInsMov HasIndex"); + } + else + { + // TODO check offset is valid for encoding + emitIns_R_R_I(ins, attr, reg, memBase->gtRegNum, offset); + } } else { - emitIns_R_R(ins, attr, addr->gtRegNum, data->gtRegNum); + if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); + } + else + { + emitIns_R_R(ins, attr, reg, addr->gtRegNum); + } } } break; @@ -7581,7 +7603,6 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node) else { assert(!data->isContained()); - codeGen->genConsumeReg(data); emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0); codeGen->genUpdateLife(varNode); } diff --git a/src/jit/lower.h b/src/jit/lower.h index c1cafb4ee8..555b9e26c6 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -210,6 +210,9 @@ private: #ifdef _TARGET_ARM64_ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); #endif // _TARGET_ARM64_ +#ifdef _TARGET_ARM_ + void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); +#endif // _TARGET_ARM64_ #ifdef FEATURE_PUT_STRUCT_ARG_STK void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree); #endif // FEATURE_PUT_STRUCT_ARG_STK diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp index 5bf23c4199..92467d79d2 100644 --- a/src/jit/lowerarm.cpp +++ b/src/jit/lowerarm.cpp @@ -23,8 +23,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator -// The ARM backend is not yet implemented, so the methods here are all NYI. -// TODO-ARM-NYI: Lowering for ARM. #ifdef _TARGET_ARM_ #include "jit.h" @@ -33,6 +31,109 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lsra.h" //------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Setting the appropriate candidates for a store of a multi-reg call return value. +// - Handling of contained immediates and widening operations of unsigneds. +// +void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + TreeNodeInfo* info = &(storeLoc->gtLsraInfo); + + // Is this the case of var = call where call is returning + // a value in multiple return registers? + GenTree* op1 = storeLoc->gtGetOp1(); + if (op1->IsMultiRegCall()) + { + // backend expects to see this case only for store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); + op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); + return; + } + + CheckImmedAndMakeContained(storeLoc, op1); + + // Try to widen the ops if they are going into a local var. + if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) + { + GenTreeIntCon* con = op1->AsIntCon(); + ssize_t ival = con->gtIconVal; + unsigned varNum = storeLoc->gtLclNum; + LclVarDsc* varDsc = comp->lvaTable + varNum; + + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); + // If we are storing a constant into a local variable + // we extend the size of the store here + if ((size < 4) && !varTypeIsStruct(varDsc)) + { + if (!varTypeIsUnsigned(varDsc)) + { + if (genTypeSize(storeLoc) == 1) + { + if ((ival & 0x7f) != ival) + { + ival = ival | 0xffffff00; + } + } + else + { + assert(genTypeSize(storeLoc) == 2); + if ((ival & 0x7fff) != ival) + { + ival = ival | 0xffff0000; + } + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + // TODO-ARM-CQ: if the field is promoted shouldn't we also be able to do this? + if (!varDsc->lvIsStructField) + { + storeLoc->gtType = TYP_INT; + con->SetIconValue(ival); + } + } + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCmp: Lower a GT comparison node. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + + info->srcCount = 2; + info->dstCount = 1; + CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); +} + +//------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // // Arguments: @@ -57,7 +158,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Note that for the overflow conversions we still depend on helper calls and // don't expect to see them here. // i) GT_CAST(float/double, int type with overflow detection) - +// void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -104,28 +205,977 @@ void Lowering::LowerCast(GenTree* tree) } } +//------------------------------------------------------------------------ +// LowerRotate: Lower GT_ROL and GT_ROL nodes. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// void Lowering::LowerRotate(GenTreePtr tree) { NYI_ARM("ARM Lowering for ROL and ROR"); } -void Lowering::TreeNodeInfoInit(GenTree* stmt) +//------------------------------------------------------------------------ +// LowerGCWriteBarrier: GC lowering helper. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::LowerGCWriteBarrier(GenTree* tree) { - NYI("ARM TreeNodInfoInit"); + GenTreePtr dst = tree; + GenTreePtr addr = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtOp.gtOp2; + + if (addr->OperGet() == GT_LEA) + { + // In the case where we are doing a helper assignment, if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + GenTreeAddrMode* lea = addr->AsAddrMode(); + + short leaSrcCount = 0; + if (lea->Base() != nullptr) + { + leaSrcCount++; + } + if (lea->Index() != nullptr) + { + leaSrcCount++; + } + lea->gtLsraInfo.srcCount = leaSrcCount; + lea->gtLsraInfo.dstCount = 1; + } + +#if NOGC_WRITE_BARRIERS + NYI_ARM("NOGC_WRITE_BARRIERS"); +#else + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); +#endif // NOGC_WRITE_BARRIERS + + // Both src and dst must reside in a register, which they should since we haven't set + // either of them as contained. + assert(addr->gtLsraInfo.dstCount == 1); + assert(src->gtLsraInfo.dstCount == 1); } -// returns true if the tree can use the read-modify-write memory instruction form -bool Lowering::isRMWRegOper(GenTreePtr tree) +//------------------------------------------------------------------------ +// SetIndirAddrOpCounts: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node +// +void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) { - return false; + assert(indirTree->OperIsIndir()); + // If this is the rhs of a block copy (i.e. non-enregisterable struct), + // it has no register requirements. + if (indirTree->TypeGet() == TYP_STRUCT) + { + return; + } + + GenTreePtr addr = indirTree->gtGetOp1(); + TreeNodeInfo* info = &(indirTree->gtLsraInfo); + + GenTreePtr base = nullptr; + GenTreePtr index = nullptr; + unsigned cns = 0; + unsigned mul; + bool rev; + bool modifiedSources = false; + + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + base = lea->Base(); + index = lea->Index(); + cns = lea->gtOffset; + + m_lsra->clearOperandCounts(addr); + // The srcCount is decremented because addr is now "contained", + // then we account for the base and index below, if they are non-null. + info->srcCount--; + } + else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && + !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) + { + // An addressing mode will be constructed that may cause some + // nodes to not need a register, and cause others' lifetimes to be extended + // to the GT_IND or even its parent if it's an assignment + + assert(base != addr); + m_lsra->clearOperandCounts(addr); + + GenTreePtr arrLength = nullptr; + + // Traverse the computation below GT_IND to find the operands + // for the addressing mode, marking the various constants and + // intermediate results as not consuming/producing. + // If the traversal were more complex, we might consider using + // a traversal function, but the addressing mode is only made + // up of simple arithmetic operators, and the code generator + // only traverses one leg of each node. + + bool foundBase = (base == nullptr); + bool foundIndex = (index == nullptr); + GenTreePtr nextChild = nullptr; + for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) + { + nextChild = nullptr; + GenTreePtr op1 = child->gtOp.gtOp1; + GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; + + if (op1 == base) + { + foundBase = true; + } + else if (op1 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op1); + if (!op1->OperIsLeaf()) + { + nextChild = op1; + } + } + + if (op2 != nullptr) + { + if (op2 == base) + { + foundBase = true; + } + else if (op2 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op2); + if (!op2->OperIsLeaf()) + { + assert(nextChild == nullptr); + nextChild = op2; + } + } + } + } + assert(foundBase && foundIndex); + info->srcCount--; // it gets incremented below. + } + else if (addr->gtOper == GT_ARR_ELEM) + { + // The GT_ARR_ELEM consumes all the indices and produces the offset. + // The array object lives until the mem access. + // We also consume the target register to which the address is + // computed + + info->srcCount++; + assert(addr->gtLsraInfo.srcCount >= 2); + addr->gtLsraInfo.srcCount -= 1; + } + else + { + // it is nothing but a plain indir + info->srcCount--; // base gets added in below + base = addr; + } + + if (base != nullptr) + { + info->srcCount++; + } + + if (index != nullptr && !modifiedSources) + { + info->srcCount++; + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitReturn(GenTree* tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + GenTree* op1 = tree->gtGetOp1(); + regMaskTP useCandidates = RBM_NONE; + + info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + info->dstCount = 0; + + if (varTypeIsStruct(tree)) + { + NYI_ARM("struct return"); + } + else + { + // Non-struct type return - determine useCandidates + switch (tree->TypeGet()) + { + case TYP_VOID: + useCandidates = RBM_NONE; + break; + case TYP_FLOAT: + useCandidates = RBM_FLOATRET; + break; + case TYP_DOUBLE: + useCandidates = RBM_DOUBLERET; + break; + case TYP_LONG: + useCandidates = RBM_LNGRET; + break; + default: + useCandidates = RBM_INTRET; + break; + } + } + + if (useCandidates != RBM_NONE) + { + tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates); + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) +{ + TreeNodeInfo* info = &(call->gtLsraInfo); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + bool hasMultiRegRetVal = false; + ReturnTypeDesc* retTypeDesc = nullptr; + + info->srcCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + info->dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + info->dstCount = 1; + } + } + else + { + info->dstCount = 0; + } + + GenTree* ctrlExpr = call->gtControlExpr; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + info->srcCount++; + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + NYI_ARM("tail call"); + } + } + else + { + info->internalIntCount = 1; + } + + RegisterType registerType = call->TypeGet(); + + // Set destination candidates for return value of the call. + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); + } + else if (varTypeIsFloating(registerType)) + { + info->setDstCandidates(l, RBM_FLOATRET); + } + else if (registerType == TYP_LONG) + { + info->setDstCandidates(l, RBM_LNGRET); + } + else + { + info->setDstCandidates(l, RBM_INTRET); + } + + // If there is an explicit this pointer, we don't want that node to produce anything + // as it is redundant + if (call->gtCallObjp != nullptr) + { + GenTreePtr thisPtrNode = call->gtCallObjp; + + if (thisPtrNode->gtOper == GT_PUTARG_REG) + { + l->clearOperandCounts(thisPtrNode); + l->clearDstCount(thisPtrNode->gtOp.gtOp1); + } + else + { + l->clearDstCount(thisPtrNode); + } + } + + // First, count reg args + bool callHasFloatRegArgs = false; + + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + { + // late arg that is not passed in a register + assert(argNode->gtOper == GT_PUTARG_STK); + + TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); + continue; + } + + var_types argType = argNode->TypeGet(); + bool argIsFloat = varTypeIsFloating(argType); + NYI_IF(argIsFloat, "float argument"); + callHasFloatRegArgs |= argIsFloat; + + regNumber argReg = curArgTabEntry->regNum; + // We will setup argMask to the set of all registers that compose this argument + regMaskTP argMask = 0; + + argNode = argNode->gtEffectiveVal(); + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST)) + { + GenTreePtr actualArgNode = argNode; + unsigned originalSize = 0; + + if (argNode->gtOper == GT_FIELD_LIST) + { + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + GenTreeFieldList* fieldListPtr = argNode->AsFieldList(); + + // Initailize the first register and the first regmask in our list + regNumber targetReg = argReg; + regMaskTP targetMask = genRegMask(targetReg); + unsigned iterationNum = 0; + originalSize = 0; + + for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest()) + { + GenTreePtr putArgRegNode = fieldListPtr->Current(); + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1; + + originalSize += REGSIZE_BYTES; // 8 bytes + + // Record the register requirements for the GT_PUTARG_REG node + putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask); + putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask); + + // To avoid redundant moves, request that the argument child tree be + // computed in the register in which the argument is passed to the call. + putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask); + + // We consume one source for each item in this list + info->srcCount++; + iterationNum++; + + // Update targetReg and targetMask for the next putarg_reg (if any) + targetReg = genRegArgNext(targetReg); + targetMask = genRegMask(targetReg); + } + } + else + { +#ifdef DEBUG + compiler->gtDispTreeRange(BlockRange(), argNode); +#endif + noway_assert(!"Unsupported TYP_STRUCT arg kind"); + } + + unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES; + regNumber curReg = argReg; + regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST; + unsigned remainingSlots = slots; + + while (remainingSlots > 0) + { + argMask |= genRegMask(curReg); + remainingSlots--; + + if (curReg == lastReg) + break; + + curReg = genRegArgNext(curReg); + } + + // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed) + noway_assert(remainingSlots == 0); + argNode->gtLsraInfo.internalIntCount = 0; + } + else // A scalar argument (not a struct) + { + // We consume one source + info->srcCount++; + + argMask |= genRegMask(argReg); + argNode->gtLsraInfo.setDstCandidates(l, argMask); + argNode->gtLsraInfo.setSrcCandidates(l, argMask); + + if (argNode->gtOper == GT_PUTARG_REG) + { + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // To avoid redundant moves, request that the argument child tree be + // computed in the register in which the argument is passed to the call. + putArgChild->gtLsraInfo.setSrcCandidates(l, argMask); + } + } + } + + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + GenTreePtr args = call->gtCallArgs; + while (args) + { + GenTreePtr arg = args->gtOp.gtOp1; + + // Skip arguments that have been moved to the Late Arg list + if (!(args->gtFlags & GTF_LATE_ARG)) + { + if (arg->gtOper == GT_PUTARG_STK) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry); + + assert(curArgTabEntry->regNum == REG_STK); + + TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); + } + else + { + TreeNodeInfo* argInfo = &(arg->gtLsraInfo); + if (argInfo->dstCount != 0) + { + argInfo->isLocalDefUse = true; + } + + argInfo->dstCount = 0; + } + } + args = args->gtOp.gtOp2; + } + + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + NYI_ARM("float reg varargs"); + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// None. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // Initialize 'argNode' as not contained, as this is both the default case + // and how MakeSrcContained expects to find things setup. + // + argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.dstCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // We consume all of the items in the GT_FIELD_LIST + argNode->gtLsraInfo.srcCount = info->numSlots; + } + else + { + // We could use a ldp/stp sequence so we need two internal registers + argNode->gtLsraInfo.internalIntCount = 2; + + if (putArgChild->OperGet() == GT_OBJ) + { + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + } + + // We will generate all of the code for the GT_PUTARG_STK and it's child node + // as one contained operation + // + MakeSrcContained(argNode, putArgChild); + } + } + else + { + // We must not have a multi-reg struct + assert(info->numSlots == 1); + } } +//------------------------------------------------------------------------ +// TreeNodeInfoInit: Set the register requirements for RA. +// +// Notes: +// Takes care of annotating the register requirements +// for every TreeNodeInfo struct that maps to each tree node. +// +// Preconditions: +// LSRA has been initialized and there is a TreeNodeInfo node +// already allocated and initialized for every tree in the IR. +// +// Postconditions: +// Every TreeNodeInfo instance has the right annotations on register +// requirements needed by LSRA to build the Interval Table (source, +// destination and internal [temp] register counts). +// This code is refactored originally from LSRA. +// +void Lowering::TreeNodeInfoInit(GenTree* tree) +{ + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + unsigned kind = tree->OperKind(); + TreeNodeInfo* info = &(tree->gtLsraInfo); + RegisterType registerType = TypeGet(tree); + + JITDUMP("TreeNodeInfoInit for: "); + DISPNODE(tree); + + switch (tree->OperGet()) + { + GenTree* op1; + GenTree* op2; + + case GT_STORE_LCL_FLD: + case GT_STORE_LCL_VAR: + info->srcCount = 1; + info->dstCount = 0; + LowerStoreLoc(tree->AsLclVarCommon()); + break; + + case GT_NOP: + // A GT_NOP is either a passthrough (if it is void, or if it has + // a child), but must be considered to produce a dummy value if it + // has a type but no child + info->srcCount = 0; + if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) + { + info->dstCount = 1; + } + else + { + info->dstCount = 0; + } + break; + + case GT_CAST: + { + info->srcCount = 1; + info->dstCount = 1; + + // Non-overflow casts to/from float/double are done using SSE2 instructions + // and that allow the source operand to be either a reg or memop. Given the + // fact that casts from small int to float/double are done as two-level casts, + // the source operand is always guaranteed to be of size 4 or 8 bytes. + var_types castToType = tree->CastToType(); + GenTreePtr castOp = tree->gtCast.CastOp(); + var_types castOpType = castOp->TypeGet(); + if (tree->gtFlags & GTF_UNSIGNED) + { + castOpType = genUnsignedType(castOpType); + } +#ifdef DEBUG + if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType))) + { + NYI_ARM("float cast"); + } +#endif // DEBUG + + if (tree->gtOverflow()) + { + NYI_ARM("overflow checks"); + } + } + break; + + case GT_JTRUE: + info->srcCount = 0; + info->dstCount = 0; + l->clearDstCount(tree->gtOp.gtOp1); + break; + + case GT_JMP: + info->srcCount = 0; + info->dstCount = 0; + break; + + case GT_SWITCH: + // This should never occur since switch nodes must not be visible at this + // point in the JIT. + info->srcCount = 0; + info->dstCount = 0; // To avoid getting uninit errors. + noway_assert(!"Switch must be lowered at this point"); + break; + + case GT_JMPTABLE: + info->srcCount = 0; + info->dstCount = 1; + break; + + case GT_SWITCH_TABLE: + info->srcCount = 2; + info->internalIntCount = 1; + info->dstCount = 0; + break; + + case GT_ASG: + case GT_ASG_ADD: + case GT_ASG_SUB: + noway_assert(!"We should never hit any assignment operator in lowering"); + info->srcCount = 0; + info->dstCount = 0; + break; + + case GT_ADD: + case GT_SUB: + if (varTypeIsFloating(tree->TypeGet())) + { + // overflow operations aren't supported on float/double types. + assert(!tree->gtOverflow()); + + // No implicit conversions at this stage as the expectation is that + // everything is made explicit by adding casts. + assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); + + info->srcCount = 2; + info->dstCount = 1; + + break; + } + + __fallthrough; + + case GT_AND: + case GT_OR: + case GT_XOR: + info->srcCount = 2; + info->dstCount = 1; + // Check and make op2 contained (if it is a containable immediate) + CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); + break; + + case GT_MUL: + if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + info->internalIntCount = 2; + } + __fallthrough; + + case GT_DIV: + case GT_MULHI: + case GT_UDIV: + { + info->srcCount = 2; + info->dstCount = 1; + } + break; + + case GT_LIST: + case GT_FIELD_LIST: + case GT_ARGPLACE: + case GT_NO_OP: + case GT_START_NONGC: + case GT_PROF_HOOK: + info->srcCount = 0; + info->dstCount = 0; + break; + + case GT_RETURN: + TreeNodeInfoInitReturn(tree); + break; + + case GT_RETFILT: + if (tree->TypeGet() == TYP_VOID) + { + info->srcCount = 0; + info->dstCount = 0; + } + else + { + assert(tree->TypeGet() == TYP_INT); + + info->srcCount = 1; + info->dstCount = 0; + + info->setSrcCandidates(l, RBM_INTRET); + tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET); + } + break; + + case GT_LEA: + { + GenTreeAddrMode* lea = tree->AsAddrMode(); + + GenTree* base = lea->Base(); + GenTree* index = lea->Index(); + unsigned cns = lea->gtOffset; + + // This LEA is instantiating an address, + // so we set up the srcCount and dstCount here. + info->srcCount = 0; + if (base != nullptr) + { + info->srcCount++; + } + if (index != nullptr) + { + info->srcCount++; + } + info->dstCount = 1; + + if ((index != nullptr) && (cns != 0)) + { + NYI_ARM("GT_LEA: index and cns are not nil"); + } + else if (!emitter::emitIns_valid_imm_for_add(cns, INS_FLAGS_DONT_CARE)) + { + NYI_ARM("GT_LEA: invalid imm"); + } + } + break; + + case GT_NEG: + info->srcCount = 1; + info->dstCount = 1; + break; + + case GT_NOT: + info->srcCount = 1; + info->dstCount = 1; + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + { + info->srcCount = 2; + info->dstCount = 1; + + GenTreePtr shiftBy = tree->gtOp.gtOp2; + GenTreePtr source = tree->gtOp.gtOp1; + if (shiftBy->IsCnsIntOrI()) + { + l->clearDstCount(shiftBy); + info->srcCount--; + } + } + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + TreeNodeInfoInitCmp(tree); + break; + + case GT_CALL: + TreeNodeInfoInitCall(tree->AsCall()); + break; + + case GT_STOREIND: + { + info->srcCount = 2; + info->dstCount = 0; + GenTree* src = tree->gtOp.gtOp2; + + if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) + { + LowerGCWriteBarrier(tree); + break; + } + + SetIndirAddrOpCounts(tree); + } + break; + + case GT_NULLCHECK: + info->dstCount = 0; + info->srcCount = 1; + info->isLocalDefUse = true; + // null check is an indirection on an addr + SetIndirAddrOpCounts(tree); + break; + + case GT_IND: + info->dstCount = 1; + info->srcCount = 1; + SetIndirAddrOpCounts(tree); + break; + + case GT_CATCH_ARG: + info->srcCount = 0; + info->dstCount = 1; + info->setDstCandidates(l, RBM_EXCEPTION_OBJECT); + break; + + case GT_CLS_VAR: + info->srcCount = 0; + // GT_CLS_VAR, by the time we reach the backend, must always + // be a pure use. + // It will produce a result of the type of the + // node, and use an internal register for the address. + + info->dstCount = 1; + assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0); + info->internalIntCount = 1; + break; + + default: +#ifdef DEBUG + JitTls::GetCompiler()->gtDispTree(tree); +#endif + NYI_ARM("TreeNodeInfoInit default case"); + case GT_LCL_FLD: + case GT_LCL_VAR: + case GT_LCL_VAR_ADDR: + case GT_CLS_VAR_ADDR: + case GT_IL_OFFSET: + case GT_CNS_INT: + case GT_PUTARG_REG: + case GT_PUTARG_STK: + info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + if (kind & (GTK_CONST | GTK_LEAF)) + { + info->srcCount = 0; + } + else if (kind & (GTK_SMPOP)) + { + if (tree->gtGetOp2() != nullptr) + { + info->srcCount = 2; + } + else + { + info->srcCount = 1; + } + } + else + { + unreached(); + } + break; + } // end switch (tree->OperGet()) + + // We need to be sure that we've set info->srcCount and info->dstCount appropriately + assert((info->dstCount < 2) || tree->IsMultiRegCall()); +} + +//------------------------------------------------------------------------ +// IsCallTargetInRange: Can a call target address be encoded in-place? +// +// Return Value: +// True if the addr fits into the range. +// bool Lowering::IsCallTargetInRange(void* addr) { return comp->codeGen->validImmForBL((ssize_t)addr); } -// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable +//------------------------------------------------------------------------ +// IsContainableImmed: Is an immediate encodable in-place? +// +// Return Value: +// True if the immediate can be folded into an instruction, +// for example small enough and non-relocatable. bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (varTypeIsFloating(parentNode->TypeGet())) @@ -185,13 +1235,6 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) if (emitter::emitIns_valid_imm_for_alu(immVal)) return true; break; - - case GT_STORE_LCL_VAR: - // TODO-ARM-Cleanup: not tested yet - NYI_ARM("ARM IsContainableImmed for GT_STORE_LCL_VAR"); - if (immVal == 0) - return true; - break; } } diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 7f5a26fa1f..aa7fba68d4 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -795,7 +795,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G BlockRange().Remove(node); break; -#ifdef _TARGET_XARCH_ +#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_) case GT_CLS_VAR: { // Class vars that are the target of an assignment will get rewritten into diff --git a/src/jit/target.h b/src/jit/target.h index a726525488..8b448a2b7f 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -1226,6 +1226,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) #define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR) + #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12 #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) @@ -1433,6 +1434,8 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_ARG_FIRST REG_R0 #define REG_ARG_LAST REG_R3 + #define REG_ARG_FP_FIRST REG_F0 + #define REG_ARG_FP_LAST REG_F7 #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots #define REG_ARG_0 REG_R0 |